From aa8d9ecac6399607c0034631258b9fd3ac5111c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=99=9A=E9=A3=8E=E6=8B=82=E6=9F=B3=E9=A2=9C?= <434857005@qq.com> Date: Wed, 7 Jun 2023 14:43:03 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8F=91=E5=B8=83=E6=96=B0=E7=89=88=E6=9C=AC?= =?UTF-8?q?=EF=BC=8Cdrpy=E6=BA=90=E5=85=A8=E9=9D=A2=E6=94=AF=E6=8C=81hostJ?= =?UTF-8?q?s=EF=BC=8C=E5=85=B7=E4=BD=93=E5=86=99=E6=B3=95=E5=8F=82?= =?UTF-8?q?=E8=80=838k=E5=BD=B1=E8=A7=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- controllers/cms.py | 518 +++++++++++++++++++++++++-------------------- doc/写源教程.md | 1 + doc/更新日志.md | 8 + js/8K影视.js | 1 + js/drpy.txt | 2 +- js/version.txt | 2 +- libs/drpy.js | 2 +- libs/drpy2.js | 2 +- libs/pre.js | 5 +- 9 files changed, 306 insertions(+), 235 deletions(-) diff --git a/controllers/cms.py b/controllers/cms.py index aac5a60..ff2178b 100644 --- a/controllers/cms.py +++ b/controllers/cms.py @@ -14,105 +14,118 @@ from utils.web import * from utils.system import getHost from utils.config import playerConfig from utils.log import logger -from utils.encode import base64Encode,base64Decode,fetch,post,request,getCryptoJS,getPreJs,buildUrl,getHome,atob,btoa -from utils.encode import verifyCode,setDetail,join,urljoin2,parseText,requireCache,forceOrder,base64ToImage,encodeStr, decodeStr +from utils.encode import base64Encode, base64Decode, fetch, post, request, getCryptoJS, getPreJs, buildUrl, getHome, \ + atob, btoa +from utils.encode import verifyCode, setDetail, join, urljoin2, parseText, requireCache, forceOrder, base64ToImage, \ + encodeStr, decodeStr from utils.encode import md5 as mmd5 from utils.safePython import safePython -from utils.parser import runPy,runJScode,JsObjectWrapper,PyJsObject,PyJsString +from utils.parser import runPy, runJScode, JsObjectWrapper, PyJsObject, PyJsString from utils.htmlParser import jsoup -from urllib.parse import urljoin,quote,unquote +from urllib.parse import urljoin, quote, unquote from concurrent.futures import ThreadPoolExecutor # 引入线程池 -from flask import url_for,redirect,render_template_string +from flask import url_for, redirect, render_template_string from easydict import EasyDict as edict from controllers.service import storage_service -def setItem(key,value): - lsg = storage_service() - if isinstance(key,PyJsString): - key = parseText(str(key)) - if isinstance(value,PyJsString): - value = parseText(str(value)) - return lsg.setItem(key,value) -def getItem(key,value=''): +def setItem(key, value): lsg = storage_service() - if isinstance(key,PyJsString): + if isinstance(key, PyJsString): key = parseText(str(key)) - if isinstance(value,PyJsString): + if isinstance(value, PyJsString): value = parseText(str(value)) - return lsg.getItem(key,value) + return lsg.setItem(key, value) + + +def getItem(key, value=''): + lsg = storage_service() + if isinstance(key, PyJsString): + key = parseText(str(key)) + if isinstance(value, PyJsString): + value = parseText(str(value)) + return lsg.getItem(key, value) + def clearItem(key): lsg = storage_service() - if isinstance(key,PyJsString): + if isinstance(key, PyJsString): key = parseText(str(key)) return lsg.clearItem(key) + def encodeUrl(url): # return base64Encode(quote(url)) # return base64Encode(url) # print(type(url)) - if isinstance(url,PyJsString): + if isinstance(url, PyJsString): # obj = obj.to_dict() url = parseText(str(url)) return quote(url) + def stringify(obj): - if isinstance(obj,PyJsObject): + if isinstance(obj, PyJsObject): # obj = obj.to_dict() obj = parseText(str(obj)) return json.dumps(obj, separators=(',', ':'), ensure_ascii=False) + def requireObj(url): - if isinstance(url,PyJsString): + if isinstance(url, PyJsString): url = parseText(str(url)) return requireCache(url) + def md5(text): - if isinstance(text,PyJsString): + if isinstance(text, PyJsString): text = parseText(str(text)) return mmd5(text) + py_ctx = { -'requests':requests,'print':print,'base64Encode':base64Encode,'base64Decode':base64Decode, -'log':logger.info,'fetch':fetch,'post':post,'request':request,'getCryptoJS':getCryptoJS, -'buildUrl':buildUrl,'getHome':getHome,'setDetail':setDetail,'join':join,'urljoin2':urljoin2, -'PC_UA':PC_UA,'MOBILE_UA':MOBILE_UA,'UC_UA':UC_UA,'UA':UA,'IOS_UA':IOS_UA, -'setItem':setItem,'getItem':getItem,'clearItem':clearItem,'stringify':stringify,'encodeUrl':encodeUrl, -'requireObj':requireObj,'md5':md5,'atob': atob, 'btoa':btoa,'base64ToImage': base64ToImage, 'encodeStr': encodeStr, + 'requests': requests, 'print': print, 'base64Encode': base64Encode, 'base64Decode': base64Decode, + 'log': logger.info, 'fetch': fetch, 'post': post, 'request': request, 'getCryptoJS': getCryptoJS, + 'buildUrl': buildUrl, 'getHome': getHome, 'setDetail': setDetail, 'join': join, 'urljoin2': urljoin2, + 'PC_UA': PC_UA, 'MOBILE_UA': MOBILE_UA, 'UC_UA': UC_UA, 'UA': UA, 'IOS_UA': IOS_UA, + 'setItem': setItem, 'getItem': getItem, 'clearItem': clearItem, 'stringify': stringify, 'encodeUrl': encodeUrl, + 'requireObj': requireObj, 'md5': md5, 'atob': atob, 'btoa': btoa, 'base64ToImage': base64ToImage, + 'encodeStr': encodeStr, 'decodeStr': decodeStr } + + # print(getCryptoJS()) class CMS: - def __init__(self, rule, db=None, RuleClass=None, PlayParse=None,new_conf=None,ext=''): + def __init__(self, rule, db=None, RuleClass=None, PlayParse=None, new_conf=None, ext=''): if new_conf is None: new_conf = {} self.lsg = storage_service() self.title = rule.get('title', '') self.id = rule.get('id', self.title) - self.filter_url = rule.get('filter_url', '').replace('{{fl}}','{{fl|safe}}') # python jinjia2禁用自动编码 - cate_exclude = rule.get('cate_exclude','') - tab_exclude = rule.get('tab_exclude','') + self.filter_url = rule.get('filter_url', '').replace('{{fl}}', '{{fl|safe}}') # python jinjia2禁用自动编码 + cate_exclude = rule.get('cate_exclude', '') + tab_exclude = rule.get('tab_exclude', '') self.lazy = rule.get('lazy', False) # self.play_disable = new_conf.get('PLAY_DISABLE',False) - self.play_disable = self.lsg.getItem('PLAY_DISABLE',False) - self.retry_count = new_conf.get('RETRY_CNT',3) + self.play_disable = self.lsg.getItem('PLAY_DISABLE', False) + self.retry_count = new_conf.get('RETRY_CNT', 3) # self.lazy_mode = new_conf.get('LAZYPARSE_MODE') - self.lazy_mode = self.lsg.getItem('LAZYPARSE_MODE',2) + self.lazy_mode = self.lsg.getItem('LAZYPARSE_MODE', 2) self.ocr_api = new_conf.get('OCR_API') # self.cate_exclude = new_conf.get('CATE_EXCLUDE','') - self.cate_exclude = self.lsg.getItem('CATE_EXCLUDE','') + self.cate_exclude = self.lsg.getItem('CATE_EXCLUDE', '') # self.tab_exclude = new_conf.get('TAB_EXCLUDE','') - self.tab_exclude = self.lsg.getItem('TAB_EXCLUDE','') + self.tab_exclude = self.lsg.getItem('TAB_EXCLUDE', '') if cate_exclude: if not str(cate_exclude).startswith('|') and not str(self.cate_exclude).endswith('|'): - self.cate_exclude = self.cate_exclude+'|'+cate_exclude + self.cate_exclude = self.cate_exclude + '|' + cate_exclude else: self.cate_exclude += cate_exclude if tab_exclude: if not str(tab_exclude).startswith('|') and not str(self.tab_exclude).endswith('|'): - self.tab_exclude = self.tab_exclude+'|'+tab_exclude + self.tab_exclude = self.tab_exclude + '|' + tab_exclude else: self.tab_exclude += tab_exclude # print(self.cate_exclude) @@ -126,10 +139,10 @@ class CMS: try: play_url = getHost(self.lazy_mode) except: - play_url = getHost(1,5705) + play_url = getHost(1, 5705) # play_url = new_conf.get('PLAY_URL',getHost(2)) if not play_url.startswith('http'): - play_url = 'http://'+play_url + play_url = 'http://' + play_url # print(play_url) if self.play_parse: # self.play_url = play_url + self.vod + '?play_url=' @@ -143,20 +156,46 @@ class CMS: else: self.play_parse = False self.play_url = '' - logger.info('播放免嗅地址: '+self.play_url) + logger.info('播放免嗅地址: ' + self.play_url) self.db = db self.RuleClass = RuleClass self.PlayParse = PlayParse - host = rule.get('host','').rstrip('/') + host = rule.get('host', '').rstrip('/') host = unquote(host) - timeout = rule.get('timeout',5000) - homeUrl = rule.get('homeUrl','/') - url = rule.get('url','') - detailUrl = rule.get('detailUrl','') - searchUrl = rule.get('searchUrl','') + HOST = host + hostJs = rule.get('hostJs', '') + if hostJs: + try: + jsp = jsoup(HOST) + py_ctx.update({ + 'HOST': HOST, + 'jsp': jsp, + 'jq': jsp, + 'TYPE': 'init', + }) + ctx = py_ctx + jscode = getPreJs() + hostJs.strip().replace('js:', '', 1) + # print(jscode) + loader, _ = runJScode(jscode, ctx=ctx) + # print(loader.toString()) + HOST = loader.eval('HOST') + # print(vods) + # 一般都是正常的str + if isinstance(HOST, PyJsString): # JsObjectWrapper + HOST = parseText(str(HOST)) + host = HOST.rstrip('/') + print('host:',host) + except Exception as e: + logger.info(f'执行{hostJs}获取host发生错误:{e}') + + timeout = rule.get('timeout', 5000) + homeUrl = rule.get('homeUrl', '/') + url = rule.get('url', '') + detailUrl = rule.get('detailUrl', '') + searchUrl = rule.get('searchUrl', '') default_headers = getHeaders(host) - self_headers = rule.get('headers',{}) + self_headers = rule.get('headers', {}) default_headers.update(self_headers) headers = default_headers cookie = self.getCookie() @@ -165,10 +204,10 @@ class CMS: if cookie: headers['cookie'] = cookie self.oheaders['cookie'] = cookie - limit = rule.get('limit',6) + limit = rule.get('limit', 6) encoding = rule.get('编码', 'utf-8') search_encoding = rule.get('搜索编码', '') - self.limit = min(limit,30) + self.limit = min(limit, 30) keys = headers.keys() for k in headers.keys(): if str(k).lower() == 'user-agent': @@ -191,7 +230,7 @@ class CMS: logger.info(f'从{v}获取cookie发生错误:{e}') pass - lower_keys = list(map(lambda x:x.lower(),keys)) + lower_keys = list(map(lambda x: x.lower(), keys)) if not 'user-agent' in lower_keys: headers['User-Agent'] = UA if not 'referer' in lower_keys: @@ -199,37 +238,37 @@ class CMS: self.headers = headers # print(headers) self.host = host - self.homeUrl = urljoin(host,homeUrl) if host and homeUrl else homeUrl or host - if url.find('[') >-1 and url.find(']') > -1: + self.homeUrl = urljoin(host, homeUrl) if host and homeUrl else homeUrl or host + if url.find('[') > -1 and url.find(']') > -1: u1 = url.split('[')[0] u2 = url.split('[')[1].split(']')[0] - self.url = urljoin(host,u1)+'['+urljoin(host,u2)+']' if host and url else url + self.url = urljoin(host, u1) + '[' + urljoin(host, u2) + ']' if host and url else url else: self.url = urljoin(host, url) if host and url else url - self.detailUrl = urljoin(host,detailUrl) if host and detailUrl else detailUrl - self.searchUrl = urljoin(host,searchUrl) if host and searchUrl else searchUrl - self.class_name = rule.get('class_name','') - self.class_url = rule.get('class_url','') - self.class_parse = rule.get('class_parse','') + self.detailUrl = urljoin(host, detailUrl) if host and detailUrl else detailUrl + self.searchUrl = urljoin(host, searchUrl) if host and searchUrl else searchUrl + self.class_name = rule.get('class_name', '') + self.class_url = rule.get('class_url', '') + self.class_parse = rule.get('class_parse', '') self.filter_name = rule.get('filter_name', '') self.filter_url = rule.get('filter_url', '') self.filter_parse = rule.get('filter_parse', '') - self.double = rule.get('double',False) - self.一级 = rule.get('一级','') - self.二级 = rule.get('二级','') - self.二级访问前 = rule.get('二级访问前','') - self.搜索 = rule.get('搜索','') - self.推荐 = rule.get('推荐','') - self.图片来源 = rule.get('图片来源','') + self.double = rule.get('double', False) + self.一级 = rule.get('一级', '') + self.二级 = rule.get('二级', '') + self.二级访问前 = rule.get('二级访问前', '') + self.搜索 = rule.get('搜索', '') + self.推荐 = rule.get('推荐', '') + self.图片来源 = rule.get('图片来源', '') self.encoding = encoding self.search_encoding = search_encoding - self.timeout = round(int(timeout)/1000,2) - self.filter = rule.get('filter',[]) - self.filter_def = rule.get('filter_def',{}) + self.timeout = round(int(timeout) / 1000, 2) + self.filter = rule.get('filter', []) + self.filter_def = rule.get('filter_def', {}) self.play_json = rule['play_json'] if 'play_json' in rule else [] self.pagecount = rule['pagecount'] if 'pagecount' in rule else {} - self.extend = rule.get('extend',[]) + self.extend = rule.get('extend', []) self.d = self.getObject() def getName(self): @@ -237,25 +276,25 @@ class CMS: def getObject(self): o = edict({ - 'jsp':jsoup(self.url), - 'getParse':self.getParse, - 'saveParse':self.saveParse, - 'oheaders':self.oheaders, - 'headers':self.headers, # 通用免嗅需要 - 'encoding':self.encoding, - 'name':self.title, - 'timeout':self.timeout, + 'jsp': jsoup(self.url), + 'getParse': self.getParse, + 'saveParse': self.saveParse, + 'oheaders': self.oheaders, + 'headers': self.headers, # 通用免嗅需要 + 'encoding': self.encoding, + 'name': self.title, + 'timeout': self.timeout, }) return o - def regexp(self,prule,text,pos=None): - ret = re.search(prule,text).groups() - if pos != None and isinstance(pos,int): + def regexp(self, prule, text, pos=None): + ret = re.search(prule, text).groups() + if pos != None and isinstance(pos, int): return ret[pos] else: return ret - def test(self,text,string): + def test(self, text, string): searchObj = re.search(rf'{text}', string, re.M | re.I) # print(searchObj) # global vflag @@ -274,7 +313,7 @@ class CMS: return { "vod_id": "id", "vod_name": "片名", - "vod_pic": "",# 图片 + "vod_pic": "", # 图片 "type_name": "剧情", "vod_year": "年份", "vod_area": "地区", @@ -294,7 +333,7 @@ class CMS: pj = jsp.pj pq = jsp.pq - return pdfh,pdfa,pd,pq + return pdfh, pdfa, pd, pq def getClasses(self): if not self.db: @@ -307,11 +346,11 @@ class CMS: res = self.db.session.query(self.RuleClass).filter(self.RuleClass.name == name).first() # _logger.info('xxxxxx') if res: - if not all([res.class_name,res.class_url]): + if not all([res.class_name, res.class_url]): return [] cls = res.class_name.split('&') cls2 = res.class_url.split('&') - classes = [{'type_name':cls[i],'type_id':cls2[i]} for i in range(len(cls))] + classes = [{'type_name': cls[i], 'type_id': cls2[i]} for i in range(len(cls))] # _logger.info(classes) logger.info(f"{self.getName()}使用缓存分类:{classes}") return classes @@ -330,7 +369,7 @@ class CMS: else: return None - def saveCookie(self,cookie): + def saveCookie(self, cookie): name = self.getName() if not self.db: msg = f'{name}未提供数据库连接' @@ -379,7 +418,7 @@ class CMS: except Exception as e: return f'发生了错误:{e}' - def getParse(self,play_url): + def getParse(self, play_url): if not self.db: msg = '未提供数据库连接' print(msg) @@ -396,26 +435,26 @@ class CMS: else: return '' - def dealJson(self,html): + def dealJson(self, html): try: # res = re.search('.*?\{(.*)\}',html,re.M|re.I).groups()[0] - res = re.search('.*?\{(.*)\}',html,re.M|re.S).groups()[0] + res = re.search('.*?\{(.*)\}', html, re.M | re.S).groups()[0] html = '{' + res + '}' return html except: return html - def checkHtml(self,r): + def checkHtml(self, r): r.encoding = self.encoding html = r.text if html.find('?btwaf=') > -1: - btwaf = re.search('btwaf(.*?)"',html,re.M|re.I).groups()[0] - url = r.url.split('#')[0]+'?btwaf'+btwaf + btwaf = re.search('btwaf(.*?)"', html, re.M | re.I).groups()[0] + url = r.url.split('#')[0] + '?btwaf' + btwaf # print(f'需要过宝塔验证:{url}') cookies_dict = requests.utils.dict_from_cookiejar(r.cookies) cookie_str = ';'.join([f'{k}={cookies_dict[k]}' for k in cookies_dict]) self.headers['cookie'] = cookie_str - r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False) + r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False) r.encoding = self.encoding html = r.text if html.find('?btwaf=') < 0: @@ -424,7 +463,7 @@ class CMS: # print(html) return html - def saveParse(self, play_url,real_url): + def saveParse(self, play_url, real_url): if not self.db: msg = '未提供数据库连接' print(msg) @@ -451,8 +490,7 @@ class CMS: except Exception as e: return f'{name}发生了错误:{e}' - - def homeContent(self,fypage=1): + def homeContent(self, fypage=1): # yanaifei # https://yanetflix.com/vodtype/dianying.html t1 = time() @@ -489,7 +527,7 @@ class CMS: pass else: new_classes = [] - r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout,verify=False) + r = requests.get(self.homeUrl, headers=self.headers, timeout=self.timeout, verify=False) html = self.checkHtml(r) # print(html) # print(self.headers) @@ -501,7 +539,7 @@ class CMS: pdfh = jsp.pdfh pdfa = jsp.pdfa pd = jsp.pd - items = pdfa(html,p[0]) + items = pdfa(html, p[0]) # print(len(items)) # print(items) for item in items: @@ -514,7 +552,7 @@ class CMS: tag = url if len(p) > 3 and p[3].strip(): try: - tag = self.regexp(p[3].strip(),url,0) + tag = self.regexp(p[3].strip(), url, 0) except: logger.info(f'分类匹配错误:{title}对应的链接{url}无法匹配{p[3]}') continue @@ -525,13 +563,15 @@ class CMS: if len(new_classes) > 0: classes.extend(new_classes) self.saveClass(classes) - video_result = self.homeVideoContent(html,fypage) + video_result = self.homeVideoContent(html, fypage) except Exception as e: logger.info(f'{self.getName()}主页发生错误:{e}') - classes = list(filter(lambda x:not self.cate_exclude or not jsoup(self.url).test(self.cate_exclude, x['type_name']),classes)) + classes = list( + filter(lambda x: not self.cate_exclude or not jsoup(self.url).test(self.cate_exclude, x['type_name']), + classes)) result['class'] = classes if self.filter: - if isinstance(self.filter,dict): + if isinstance(self.filter, dict): result['filters'] = self.filter else: result['filters'] = playerConfig['filter'] @@ -540,7 +580,7 @@ class CMS: logger.info(f'{self.getName()}获取首页总耗时(包含读取缓存):{get_interval(t1)}毫秒') return result - def homeVideoContent(self,html,fypage=1): + def homeVideoContent(self, html, fypage=1): p = self.推荐 if not p: return self.blank() @@ -555,12 +595,12 @@ class CMS: 'input': self.homeUrl, 'HOST': self.host, 'TYPE': 'home', # 海阔js环境标志 - 'oheaders':self.d.oheaders, + 'oheaders': self.d.oheaders, 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding}, 'd': self.d, 'getParse': self.d.getParse, 'saveParse': self.d.saveParse, - 'jsp': jsp,'jq':jsp,'setDetail': setDetail, + 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail, }) ctx = py_ctx jscode = getPreJs() + p.strip().replace('js:', '', 1) @@ -586,13 +626,15 @@ class CMS: return self.blank() jsp = jsoup(self.homeUrl) pp = self.一级.split(';') - def getPP(p,pn,pp,ppn): + + def getPP(p, pn, pp, ppn): try: ps = pp[ppn] if p[pn] == '*' and len(pp) > ppn else p[pn] return ps except Exception as e: return '' - p0 = getPP(p,0,pp,0) + + p0 = getPP(p, 0, pp, 0) is_json = str(p0).startswith('json:') if is_json: html = self.dealJson(html) @@ -603,7 +645,7 @@ class CMS: # print(html) try: if self.double: - items = pdfa(html, p0.replace('json:','')) + items = pdfa(html, p0.replace('json:', '')) # print(p[0]) # print(items) # print(len(items)) @@ -614,7 +656,7 @@ class CMS: p5 = getPP(p, 5, pp, 4) p6 = getPP(p, 6, pp, 5) for item in items: - items2 = pdfa(item,p1) + items2 = pdfa(item, p1) # print(len(items2)) for item2 in items2: try: @@ -628,7 +670,8 @@ class CMS: desc = pdfh(item2, p4) except: desc = '' - links = [pd(item2, _p5) if not self.detailUrl else pdfh(item2, _p5) for _p5 in p5.split('+')] + links = [pd(item2, _p5) if not self.detailUrl else pdfh(item2, _p5) for _p5 in + p5.split('+')] vid = '$'.join(links) if len(p) > 6 and p[6]: content = pdfh(item2, p6) @@ -641,7 +684,7 @@ class CMS: "vod_name": title, "vod_pic": img, "vod_remarks": desc, - "no_use":{ + "no_use": { "vod_content": content, "type_id": 1, "type_name": "首页推荐", @@ -650,7 +693,7 @@ class CMS: except: pass else: - items = pdfa(html, p0.replace('json:','')) + items = pdfa(html, p0.replace('json:', '')) # print(items) p1 = getPP(p, 1, pp, 1) p2 = getPP(p, 2, pp, 2) @@ -698,7 +741,7 @@ class CMS: return self.blank() if self.图片来源: for video in videos: - if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'): + if video.get('vod_pic', '') and str(video['vod_pic']).startswith('http'): video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}" result['list'] = videos # print(videos) @@ -726,9 +769,9 @@ class CMS: if fl is None: fl = {} # print(f'fl:{fl}') - if self.filter_def and isinstance(self.filter_def,dict): + if self.filter_def and isinstance(self.filter_def, dict): try: - if self.filter_def.get(fyclass) and isinstance(self.filter_def[fyclass],dict): + if self.filter_def.get(fyclass) and isinstance(self.filter_def[fyclass], dict): self_filter_def = self.filter_def[fyclass] filter_def = ujson.loads(ujson.dumps(self_filter_def)) filter_def.update(fl) @@ -748,20 +791,20 @@ class CMS: # url = self.url + '/{0}.html'.format t1 = time() pg = str(fypage) - url = self.url.replace('fyclass',fyclass) - if fypage == 1 and self.test('[\[\]]',url): + url = self.url.replace('fyclass', fyclass) + if fypage == 1 and self.test('[\[\]]', url): url = url.split('[')[1].split(']')[0] - elif fypage > 1 and self.test('[\[\]]',url): + elif fypage > 1 and self.test('[\[\]]', url): url = url.split('[')[0] if self.filter_url: - if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数 + if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数 if not url.endswith('&') and not self.filter_url.startswith('&'): url += '&' url += self.filter_url - else: # 第二种情况直接替换关键字为待拼接的结果后面渲染,适用于 ----fypage.html的情况 + else: # 第二种情况直接替换关键字为待拼接的结果后面渲染,适用于 ----fypage.html的情况 url = url.replace('fyfilter', self.filter_url) # print(f'url渲染:{url}') - url = render_template_string(url,fl=fl) + url = render_template_string(url, fl=fl) # fl_url = render_template_string(self.filter_url,fl=fl) # if not 'fyfilter' in url: # 第一种情况,默认不写fyfilter关键字,视为直接拼接在链接后面当参数 # if not url.endswith('&') and not fl_url.startswith('&'): @@ -774,17 +817,17 @@ class CMS: # url_rep = url[url.find('('):url.find(')')+1] # cnt_page = url.split('(')[1].split(')')[0].replace('fypage',pg) # print(url_rep) - url_rep = re.search('.*?\((.*)\)',url,re.M|re.S).groups()[0] + url_rep = re.search('.*?\((.*)\)', url, re.M | re.S).groups()[0] cnt_page = url_rep.replace('fypage', pg) # print(url_rep) # print(cnt_page) cnt_ctx = {} exec(f'cnt_pg={cnt_page}', cnt_ctx) - cnt_pg = str(cnt_ctx['cnt_pg']) # 计算表达式的结果 - url = url.replace(url_rep,str(cnt_pg)).replace('(','').replace(')','') + cnt_pg = str(cnt_ctx['cnt_pg']) # 计算表达式的结果 + url = url.replace(url_rep, str(cnt_pg)).replace('(', '').replace(')', '') # print(url) else: - url = url.replace('fypage',pg) + url = url.replace('fypage', pg) # print(url) logger.info(url) @@ -800,13 +843,13 @@ class CMS: 'oheaders': self.d.oheaders, 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, 'encoding': self.d.encoding}, 'd': self.d, - 'MY_CATE':fyclass, # 分类id - 'MY_FL':fl, # 筛选 - 'MY_PAGE':fypage, # 页数 - 'detailUrl':self.detailUrl or '', # 详情页链接 + 'MY_CATE': fyclass, # 分类id + 'MY_FL': fl, # 筛选 + 'MY_PAGE': fypage, # 页数 + 'detailUrl': self.detailUrl or '', # 详情页链接 'getParse': self.d.getParse, 'saveParse': self.d.saveParse, - 'jsp': jsp,'jq':jsp, 'setDetail': setDetail, + 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail, }) ctx = py_ctx # print(ctx) @@ -836,7 +879,7 @@ class CMS: items = [] try: - r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False) + r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False) html = self.checkHtml(r) print(self.headers) # print(html) @@ -849,7 +892,7 @@ class CMS: # print(html) # with open('1.html',mode='w+',encoding='utf-8') as f: # f.write(html) - items = pdfa(html,p[0].replace('json:','',1)) + items = pdfa(html, p[0].replace('json:', '', 1)) except: pass # print(items) @@ -863,9 +906,9 @@ class CMS: link = '$'.join(links) content = '' if len(p) < 6 else pdfh(item, p[5]) # sid = self.regStr(sid, "/video/(\\S+).html") - vod_id = f'{fyclass}${link}' if self.detailUrl else link # 分类,播放链接 + vod_id = f'{fyclass}${link}' if self.detailUrl else link # 分类,播放链接 if self.二级 == '*': - vod_id = vod_id+'@@'+title+'@@'+img + vod_id = vod_id + '@@' + title + '@@' + img videos.append({ "vod_id": vod_id, @@ -880,43 +923,44 @@ class CMS: if self.图片来源: for video in videos: - if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'): + if video.get('vod_pic', '') and str(video['vod_pic']).startswith('http'): video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}" - print('videos:',videos) + print('videos:', videos) limit = 40 cnt = 9999 if len(videos) > 0 else 0 pagecount = 0 - if self.pagecount and isinstance(self.pagecount,dict) and fyclass in self.pagecount: + if self.pagecount and isinstance(self.pagecount, dict) and fyclass in self.pagecount: print(f'fyclass:{fyclass},self.pagecount:{self.pagecount}') pagecount = int(self.pagecount[fyclass]) result['list'] = videos result['page'] = fypage - result['pagecount'] = pagecount or max(cnt,fypage) + result['pagecount'] = pagecount or max(cnt, fypage) result['limit'] = limit result['total'] = cnt # print(result) # print(result['pagecount']) - logger.info(f'{self.getName()}获取分类{fyclass}第{fypage}页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb') + logger.info( + f'{self.getName()}获取分类{fyclass}第{fypage}页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb') nodata = { 'list': [{'vod_name': '无数据,防无限请求', 'vod_id': 'no_data', 'vod_remarks': '不要点,会崩的', - 'vod_pic': 'https://ghproxy.net/https://raw.githubusercontent.com/hjdhnx/dr_py/main/404.jpg'}], + 'vod_pic': 'https://ghproxy.net/https://raw.githubusercontent.com/hjdhnx/dr_py/main/404.jpg'}], 'total': 1, 'pagecount': 1, 'page': 1, 'limit': 1 } # return result return result if len(result['list']) > 0 else nodata - def 二级渲染(self,parse_str:'str|dict',**kwargs): + def 二级渲染(self, parse_str: 'str|dict', **kwargs): # *args是不定长参数 列表 # ** args是不定长参数字典 p = parse_str # 二级传递解析表达式 js的obj json对象 - detailUrl = kwargs.get('detailUrl','') # 不定长字典传递的二级详情页vod_id详情处理数据 - orId = kwargs.get('orId','') # 不定长字典传递的二级详情页vod_id原始数据 - url = kwargs.get('url','') # 不定长字典传递的二级详情页链接智能拼接数据 - vod = kwargs.get('vod',self.blank_vod()) # 最终要返回的二级详情页数据 默认空 - html = kwargs.get('html','') # 不定长字典传递的源码(如果不传才会在下面程序中去获取) - show_name = kwargs.get('show_name','') # 是否显示来源(用于drpy区分) - jsp = kwargs.get('jsp','') # jsp = jsoup(self.url) 传递的jsp解析 - fyclass = kwargs.get('fyclass','') # 二级传递的分类名称,可以得知进去的类别 + detailUrl = kwargs.get('detailUrl', '') # 不定长字典传递的二级详情页vod_id详情处理数据 + orId = kwargs.get('orId', '') # 不定长字典传递的二级详情页vod_id原始数据 + url = kwargs.get('url', '') # 不定长字典传递的二级详情页链接智能拼接数据 + vod = kwargs.get('vod', self.blank_vod()) # 最终要返回的二级详情页数据 默认空 + html = kwargs.get('html', '') # 不定长字典传递的源码(如果不传才会在下面程序中去获取) + show_name = kwargs.get('show_name', '') # 是否显示来源(用于drpy区分) + jsp = kwargs.get('jsp', '') # jsp = jsoup(self.url) 传递的jsp解析 + fyclass = kwargs.get('fyclass', '') # 二级传递的分类名称,可以得知进去的类别 play_url = self.play_url vod_name = '片名' vod_pic = '' @@ -938,7 +982,8 @@ class CMS: vod['vod_pic'] = vod_pic vod['vod_play_url'] = '嗅探播放$' + play_url + url.split('@@')[0] - elif not p or (not isinstance(p, dict) and not isinstance(p, str)) or (isinstance(p, str) and not str(p).startswith('js:')): + elif not p or (not isinstance(p, dict) and not isinstance(p, str)) or ( + isinstance(p, str) and not str(p).startswith('js:')): pass else: is_json = p.get('is_json', False) if isinstance(p, dict) else False # 二级里加is_json参数 @@ -947,8 +992,8 @@ class CMS: pd = jsp.pj if is_json else jsp.pd pq = jsp.pq vod['vod_id'] = orId - if not html: # 没传递html参数接detailUrl下来智能获取 - r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False) + if not html: # 没传递html参数接detailUrl下来智能获取 + r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False) html = self.checkHtml(r) if is_json: html = self.dealJson(html) @@ -958,7 +1003,7 @@ class CMS: if p.get('title'): p1 = p['title'].split(';') vod['vod_name'] = pdfh(html, p1[0]).replace('\n', ' ').strip() - vod['type_name'] = pdfh(html, p1[1]).replace('\n',' ').strip() if len(p1)>1 else '' + vod['type_name'] = pdfh(html, p1[1]).replace('\n', ' ').strip() if len(p1) > 1 else '' if p.get('desc'): try: p1 = p['desc'].split(';') @@ -988,7 +1033,8 @@ class CMS: vod_play_from = '$$$' playFrom = [] - init_flag = {'ctx':False} + init_flag = {'ctx': False} + def js_pre(): headers['Referer'] = getHome(url) py_ctx.update({ @@ -1001,9 +1047,10 @@ class CMS: 'd': self.d, 'getParse': self.d.getParse, 'saveParse': self.d.saveParse, - 'jsp': jsp,'jq':jsp, 'setDetail': setDetail,'play_url':play_url + 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail, 'play_url': play_url }) init_flag['ctx'] = True + if p.get('重定向') and str(p['重定向']).startswith('js:'): if not init_flag['ctx']: js_pre() @@ -1047,13 +1094,13 @@ class CMS: # print(str(etree.tostring(vHeader[0], pretty_print=True), 'utf-8')) from lxml.html import tostring as html2str # print(html2str(vHeader[0].root).decode('utf-8')) - tab_text = p.get('tab_text','') or 'body&&Text' + tab_text = p.get('tab_text', '') or 'body&&Text' # print('tab_text:'+tab_text) if not is_json: for v in vHeader: # 过滤排除掉线路标题 # v_title = pq(v).text() - v_title = pdfh(v,tab_text).strip() + v_title = pdfh(v, tab_text).strip() # print(v_title) if self.tab_exclude and jsp.test(self.tab_exclude, v_title): continue @@ -1073,7 +1120,7 @@ class CMS: else: new_map[v] += 1 if new_map[v] > 1: - v = f'{v}{new_map[v]-1}' + v = f'{v}{new_map[v] - 1}' playFrom.append(v) vod_play_from = vod_play_from.join(playFrom) @@ -1084,7 +1131,7 @@ class CMS: if not init_flag['ctx']: js_pre() ctx = py_ctx - ctx['TABS'] = vodHeader # 把选集列表传过去 + ctx['TABS'] = vodHeader # 把选集列表传过去 rcode = p['lists'].replace('js:', '', 1) jscode = getPreJs() + rcode # print(jscode) @@ -1093,16 +1140,16 @@ class CMS: logger.info(f'开始执行lists代码:{rcode}') vlists = loader.eval('LISTS') if isinstance(vod, JsObjectWrapper): - vlists = vlists.to_list() # [['第1集$http://1.mp4','第2集$http://2.mp4'],['第3集$http://1.mp4','第4集$http://2.mp4']] + vlists = vlists.to_list() # [['第1集$http://1.mp4','第2集$http://2.mp4'],['第3集$http://1.mp4','第4集$http://2.mp4']] for i in range(len(vlists)): try: - vlists[i] = list(map(lambda x:'$'.join(x.split('$')[:2]),vlists[i])) + vlists[i] = list(map(lambda x: '$'.join(x.split('$')[:2]), vlists[i])) except Exception as e: logger.info(f'LISTS格式化发生错误:{e}') - vod_play_url = vod_play_url.join(list(map(lambda x:'#'.join(x),vlists))) + vod_play_url = vod_play_url.join(list(map(lambda x: '#'.join(x), vlists))) else: - list_text = p.get('list_text','') or 'body&&Text' - list_url = p.get('list_url','') or 'a&&href' + list_text = p.get('list_text', '') or 'body&&Text' + list_url = p.get('list_url', '') or 'a&&href' print('list_text:' + list_text) print('list_url:' + list_url) is_tab_js = p['tabs'].strip().startswith('js:') @@ -1118,35 +1165,38 @@ class CMS: # vodList = [pq(i).text()+'$'+pd(i,'a&&href') for i in vodList] # 拼接成 名称$链接 # pq(i).text() if self.play_parse: # 自动base64编码 - vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + play_url + encodeUrl(i) for i + vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + play_url + encodeUrl(i) + for i in vodList] if is_json else \ - [pdfh(i,list_text) + '$' + play_url + encodeUrl(pd(i, list_url)) for i in vodList] # 拼接成 名称$链接 + [pdfh(i, list_text) + '$' + play_url + encodeUrl(pd(i, list_url)) for i in + vodList] # 拼接成 名称$链接 else: vodList = [(pdfh(html, tab_ext) if tab_ext else tab_name) + '$' + play_url + i for i in vodList] if is_json else \ - [pdfh(i,list_text) + '$' + play_url + pd(i, list_url) for i in vodList] # 拼接成 名称$链接 + [pdfh(i, list_text) + '$' + play_url + pd(i, list_url) for i in vodList] # 拼接成 名称$链接 # print(vodList) - vodList = forceOrder(vodList,option=lambda x:x.split('$')[0]) + vodList = forceOrder(vodList, option=lambda x: x.split('$')[0]) # print(vodList) vlist = '#'.join(vodList) # 拼多个选集 # print(vlist) vod_tab_list.append(vlist) vod_play_url = vod_play_url.join(vod_tab_list) - vod_play_url_str = vod_play_url[:min(len(vod_play_url),500)] + vod_play_url_str = vod_play_url[:min(len(vod_play_url), 500)] print(vod_play_url_str) vod['vod_play_from'] = vod_play_from # print(vod_play_from) vod['vod_play_url'] = vod_play_url - logger.info(f'{self.getName()}仅二级渲染{len(vod_play_url.split("$$$")[0].split("$"))}集耗时:{get_interval(tt1)}毫秒,共计{round(len(str(vod)) / 1000, 2)} kb') + logger.info( + f'{self.getName()}仅二级渲染{len(vod_play_url.split("$$$")[0].split("$"))}集耗时:{get_interval(tt1)}毫秒,共计{round(len(str(vod)) / 1000, 2)} kb') if show_name: vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}' return vod - def detailOneVod(self,id,fyclass='',show_name=False): + def detailOneVod(self, id, fyclass='', show_name=False): vod = self.blank_vod() orId = str(id) orUrl = orId @@ -1155,10 +1205,10 @@ class CMS: detailUrl = orId.split('@@')[0] # print(detailUrl) if not detailUrl.startswith('http') and not '/' in detailUrl: - url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass',fyclass) + url = self.detailUrl.replace('fyid', detailUrl).replace('fyclass', fyclass) # print(url) elif '/' in detailUrl: - url = urljoin(self.homeUrl,detailUrl) + url = urljoin(self.homeUrl, detailUrl) else: url = detailUrl if self.二级访问前: @@ -1186,7 +1236,7 @@ class CMS: try: p = self.二级 # 解析 jsp = jsoup(url) if url.startswith('http') else jsoup(self.url) - is_js = isinstance(p,str) and str(p).startswith('js:') # 是js + is_js = isinstance(p, str) and str(p).startswith('js:') # 是js if is_js: headers['Referer'] = getHome(url) play_url = self.play_url @@ -1203,34 +1253,35 @@ class CMS: 'd': self.d, 'getParse': self.d.getParse, 'saveParse': self.d.saveParse, - 'jsp':jsp,'jq':jsp,'setDetail':setDetail,'play_url':play_url + 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail, 'play_url': play_url }) ctx = py_ctx # print(ctx) - jscode = getPreJs() + p.replace('js:','',1) + jscode = getPreJs() + p.replace('js:', '', 1) # print(jscode) loader, _ = runJScode(jscode, ctx=ctx) # print(loader.toString()) vod = loader.eval('VOD') - if isinstance(vod,JsObjectWrapper): + if isinstance(vod, JsObjectWrapper): vod = vod.to_dict() if show_name: vod['vod_content'] = f'({self.id}){vod.get("vod_content", "")}' else: vod = self.blank_vod() else: - vod = self.二级渲染(p,detailUrl=detailUrl,orId=orUrl,url=url,vod=vod,show_name=show_name,jsp=jsp,fyclass=fyclass) + vod = self.二级渲染(p, detailUrl=detailUrl, orId=orUrl, url=url, vod=vod, show_name=show_name, jsp=jsp, + fyclass=fyclass) except Exception as e: logger.info(f'{self.getName()}获取单个详情页{detailUrl}出错{e}') if self.图片来源: - if vod.get('vod_pic','') and str(vod['vod_pic']).startswith('http'): + if vod.get('vod_pic', '') and str(vod['vod_pic']).startswith('http'): vod['vod_pic'] = f"{vod['vod_pic']}{self.图片来源}" - if not vod.get('vod_id') or ('$' in orUrl and vod['vod_id']!=orUrl): + if not vod.get('vod_id') or ('$' in orUrl and vod['vod_id'] != orUrl): vod['vod_id'] = orUrl # print(vod) return vod - def detailContent(self, fypage, array,show_name=False): + def detailContent(self, fypage, array, show_name=False): """ cms二级数据 :param array: @@ -1238,8 +1289,9 @@ class CMS: """ # print('进入二级') t1 = time() - array = array if len(array) <= self.limit else array[(fypage-1)*self.limit:min(self.limit*fypage,len(array))] - thread_pool = ThreadPoolExecutor(min(self.limit,len(array))) # 定义线程池来启动多线程执行此任务 + array = array if len(array) <= self.limit else array[ + (fypage - 1) * self.limit:min(self.limit * fypage, len(array))] + thread_pool = ThreadPoolExecutor(min(self.limit, len(array))) # 定义线程池来启动多线程执行此任务 obj_list = [] try: for vod_url in array: @@ -1249,14 +1301,15 @@ class CMS: tmp = vod_url.split('$') vod_class = tmp[0] vod_url = tmp[1] - obj = thread_pool.submit(self.detailOneVod, vod_url,vod_class,show_name) + obj = thread_pool.submit(self.detailOneVod, vod_url, vod_class, show_name) obj_list.append(obj) thread_pool.shutdown(wait=True) # 等待所有子线程并行完毕 vod_list = [obj.result() for obj in obj_list] result = { 'list': vod_list } - logger.info(f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb') + logger.info( + f'{self.getName()}获取详情页耗时:{get_interval(t1)}毫秒,共计{round(len(str(result)) / 1000, 2)} kb') except Exception as e: result = { 'list': [] @@ -1265,18 +1318,18 @@ class CMS: # print(result) return result - def searchContent(self, key, fypage=1,show_name=False): + def searchContent(self, key, fypage=1, show_name=False): if self.search_encoding: if str(self.search_encoding).lower() != 'utf-8': - key = encodeStr(key,self.search_encoding) + key = encodeStr(key, self.search_encoding) elif self.encoding and str(self.encoding).startswith('gb'): # key = quote(key.encode('utf-8').decode('utf-8').encode(self.encoding,'ignore')) - key = encodeStr(key,self.encoding) + key = encodeStr(key, self.encoding) # print(key) pg = str(fypage) if not self.searchUrl: return self.blank() - url = self.searchUrl.replace('**', key).replace('fypage',pg) + url = self.searchUrl.replace('**', key).replace('fypage', pg) logger.info(f'{self.getName()}搜索链接:{url}') if not self.搜索: return self.blank() @@ -1293,6 +1346,7 @@ class CMS: return ps except: return '' + if is_js: headers['Referer'] = getHome(url) py_ctx.update({ @@ -1307,7 +1361,7 @@ class CMS: # 详情页链接 'getParse': self.d.getParse, 'saveParse': self.d.saveParse, - 'jsp': jsp,'jq':jsp, 'setDetail': setDetail, + 'jsp': jsp, 'jq': jsp, 'setDetail': setDetail, }) ctx = py_ctx # print(ctx) @@ -1330,11 +1384,11 @@ class CMS: pd = jsp.pj if is_json else jsp.pd pq = jsp.pq try: - req_method = url.split(';')[1].lower() if len(url.split(';'))>1 else 'get' + req_method = url.split(';')[1].lower() if len(url.split(';')) > 1 else 'get' if req_method == 'post': rurls = url.split(';')[0].split('#') rurl = rurls[0] - params = rurls[1] if len(rurls)>1 else '' + params = rurls[1] if len(rurls) > 1 else '' # params = quote(params) print(f'rurl:{rurl},params:{params}') new_dict = {} @@ -1346,13 +1400,13 @@ class CMS: data = new_dict # print(data) logger.info(self.headers) - r = requests.post(rurl, headers=self.headers,data=data, timeout=self.timeout, verify=False) + r = requests.post(rurl, headers=self.headers, data=data, timeout=self.timeout, verify=False) elif req_method == 'postjson': rurls = url.split(';')[0].split('#') rurl = rurls[0] params = rurls[1] if len(rurls) > 1 else '{}' headers_cp = self.headers.copy() - headers_cp.update({'Content-Type':'application/json'}) + headers_cp.update({'Content-Type': 'application/json'}) try: params = ujson.dumps(ujson.loads(params)) except: @@ -1362,15 +1416,15 @@ class CMS: logger.info(params) r = requests.post(rurl, headers=headers_cp, data=params, timeout=self.timeout, verify=False) else: - r = requests.get(url, headers=self.headers,timeout=self.timeout,verify=False) + r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False) html = self.checkHtml(r) if is_json: html = self.dealJson(html) html = json.loads(html) # if not is_json and html.find('输入验证码') > -1: - if not is_json and re.search('系统安全验证|输入验证码',html,re.M|re.S): - cookie = verifyCode(url,self.headers,self.timeout,self.retry_count,self.ocr_api) + if not is_json and re.search('系统安全验证|输入验证码', html, re.M | re.S): + cookie = verifyCode(url, self.headers, self.timeout, self.retry_count, self.ocr_api) # cookie = '' if not cookie: return { @@ -1378,15 +1432,15 @@ class CMS: } self.saveCookie(cookie) self.headers['cookie'] = cookie - r = requests.get(url, headers=self.headers, timeout=self.timeout,verify=False) + r = requests.get(url, headers=self.headers, timeout=self.timeout, verify=False) r.encoding = self.encoding html = r.text if not show_name and not str(html).find(key) > -1: logger.info('搜索结果源码未包含关键字,疑似搜索失败,正为您打印结果源码') print(html) - p0 = getPP(p,0,pp,0) - items = pdfa(html,p0.replace('json:','',1)) + p0 = getPP(p, 0, pp, 0) + items = pdfa(html, p0.replace('json:', '', 1)) # print(len(items),items) videos = [] p1 = getPP(p, 1, pp, 1) @@ -1425,7 +1479,7 @@ class CMS: "vod_name": title, "vod_pic": img, "vod_remarks": desc, - "vod_content": content, # 无用参数 + "vod_content": content, # 无用参数 }) except Exception as e: print(f'搜索列表解析发生错误:{e}') @@ -1435,19 +1489,19 @@ class CMS: logger.info(f'搜索{self.getName()}发生错误:{e}') if self.图片来源: for video in videos: - if video.get('vod_pic','') and str(video['vod_pic']).startswith('http'): + if video.get('vod_pic', '') and str(video['vod_pic']).startswith('http'): video['vod_pic'] = f"{video['vod_pic']}{self.图片来源}" if show_name and len(videos) > 0: for video in videos: - video['vod_name'] = self.id + ' '+video['vod_name'] + video['vod_name'] = self.id + ' ' + video['vod_name'] video['vod_rule'] = self.id - video['vod_id'] = video['vod_id'] +'#' + self.id + video['vod_id'] = video['vod_id'] + '#' + self.id result = { 'list': videos } return result - def playContent(self, play_url,jxs=None,flag=None): + def playContent(self, play_url, jxs=None, flag=None): # flag参数只有类型为4的时候才有,可以忽略 # logger.info('播放免嗅地址: ' + self.play_url) # 注意:全局flags里的视频没法执行免嗅代码,因为会自动拦截去调用解析: url=yoursite:5705/vod?play_url=xxxx @@ -1455,9 +1509,9 @@ class CMS: jxs = [] # print(play_url) - if play_url.find('http') == -1: # 字符串看起来被编码的 + if play_url.find('http') == -1: # 字符串看起来被编码的 try: - play_url = base64Decode(play_url) # 自动base64解码 + play_url = base64Decode(play_url) # 自动base64解码 except: pass # print(unquote(play_url)) @@ -1478,13 +1532,14 @@ class CMS: pos = pycode.find('def lazyParse') if pos < 0: return play_url - pyenv = safePython(self.lazy,pycode[pos:]) - lazy_url = pyenv.action_task_exec('lazyParse',[play_url,self.d]) + pyenv = safePython(self.lazy, pycode[pos:]) + lazy_url = pyenv.action_task_exec('lazyParse', [play_url, self.d]) logger.info(f'py免嗅耗时:{get_interval(t1)}毫秒,播放地址:{lazy_url}') - if isinstance(lazy_url,str) and lazy_url.startswith('http'): + if isinstance(lazy_url, str) and lazy_url.startswith('http'): play_url = lazy_url else: - jscode = str(self.lazy).strip().replace('js:', '', 1) if str(self.lazy).startswith('js:') else js_code + jscode = str(self.lazy).strip().replace('js:', '', 1) if str(self.lazy).startswith( + 'js:') else js_code jsp = jsoup(self.url) # jscode = f'var input={play_url};{jscode}' # print(jscode) @@ -1492,29 +1547,30 @@ class CMS: py_ctx.update({ 'input': play_url, 'oheaders': self.d.oheaders, - 'fetch_params':{'headers':self.headers,'timeout':self.d.timeout,'encoding':self.d.encoding}, + 'fetch_params': {'headers': self.headers, 'timeout': self.d.timeout, + 'encoding': self.d.encoding}, 'd': self.d, - 'jxs':jxs, - 'getParse':self.d.getParse, - 'saveParse':self.d.saveParse, + 'jxs': jxs, + 'getParse': self.d.getParse, + 'saveParse': self.d.saveParse, 'jsp': jsp, 'jq': jsp, 'pdfh': self.d.jsp.pdfh, - 'pdfa': self.d.jsp.pdfa, 'pd': self.d.jsp.pd,'play_url':self.play_url + 'pdfa': self.d.jsp.pdfa, 'pd': self.d.jsp.pd, 'play_url': self.play_url }) ctx = py_ctx # print(ctx) jscode = getPreJs() + jscode # print(jscode) - loader,_ = runJScode(jscode,ctx=ctx) + loader, _ = runJScode(jscode, ctx=ctx) # print(loader.toString()) play_url = loader.eval('input') - if isinstance(play_url,JsObjectWrapper): + if isinstance(play_url, JsObjectWrapper): play_url = play_url.to_dict() # print(type(play_url)) # print(play_url) logger.info(f'js免嗅耗时:{get_interval(t1)}毫秒,播放地址:{play_url}') - if not play_url and play_url!='' and play_url!={}: + if not play_url and play_url != '' and play_url != {}: play_url = origin_play_url # if play_url == {}: # play_url = None @@ -1527,11 +1583,11 @@ class CMS: if self.play_json: # 如果传了 play_json 参数并且是个大于0的列表的话 - if isinstance(self.play_json,list) and len(self.play_json) > 0: + if isinstance(self.play_json, list) and len(self.play_json) > 0: # 获取播放链接 - web_url = play_url if isinstance(play_url,str) else play_url.get('url') + web_url = play_url if isinstance(play_url, str) else play_url.get('url') for pjson in self.play_json: - if pjson.get('re') and (pjson['re']=='*' or re.search(pjson['re'],web_url,re.S|re.M)): + if pjson.get('re') and (pjson['re'] == '*' or re.search(pjson['re'], web_url, re.S | re.M)): if pjson.get('json') and isinstance(pjson['json'], dict): if isinstance(play_url, str): base_json = pjson['json'] @@ -1544,31 +1600,33 @@ class CMS: # 不管有没有效,匹配到了就跑??? (当然不行了,要不然写来干嘛) break - else: # 没有指定列表默认表示需要解析,解析播放 (如果不要解析,我想也是没人会去写这个参数) + else: # 没有指定列表默认表示需要解析,解析播放 (如果不要解析,我想也是没人会去写这个参数) base_json = { - 'jx':1, # 解析开 - 'parse':1, # 嗅探 关 pluto这个标识有问题 只好双1了 + 'jx': 1, # 解析开 + 'parse': 1, # 嗅探 关 pluto这个标识有问题 只好双1了 } - if isinstance(play_url,str): + if isinstance(play_url, str): base_json['url'] = play_url play_url = base_json - elif isinstance(play_url,dict): + elif isinstance(play_url, dict): play_url.update(base_json) logger.info(f'最终返回play_url:{play_url}') return play_url + if __name__ == '__main__': print(urljoin('https://api.web.360kan.com/v1/f', '//0img.hitv.com/preview/sp_images/2022/01/28/202201281528074643023.jpg')) # exit() from utils import parser + # js_path = f'js/玩偶姐姐.js' # js_path = f'js/555影视.js' with open('../js/模板.js', encoding='utf-8') as f: before = f.read().split('export')[0] js_path = f'js/360影视.js' - ctx, js_code = parser.runJs(js_path,before=before) + ctx, js_code = parser.runJs(js_path, before=before) ruleDict = ctx.rule.to_dict() # lazy = ctx.eval('lazy') # print(lazy) @@ -1584,4 +1642,4 @@ if __name__ == '__main__': # cms.categoryContent('dianying',1) # print(cms.detailContent(['67391'])) # print(cms.searchContent('斗罗大陆')) - print(cms.searchContent('独行月球')) \ No newline at end of file + print(cms.searchContent('独行月球')) diff --git a/doc/写源教程.md b/doc/写源教程.md index 2f806b9..318c45f 100644 --- a/doc/写源教程.md +++ b/doc/写源教程.md @@ -15,6 +15,7 @@ var rule = { 编码:'',//不填就默认utf-8 搜索编码:'',//不填则不编码,默认都是按utf-8.可优先于全局编码属性.比如网页源码编码是gbk,这里可以指定utf-8搜索独立编码。多数情况这个属性不填或者填写gbk应对特殊的网站搜索 host:'',//网页的域名根,包含http头如 https://www,baidu.com + hostJS:'print(HOST);let html=request(HOST,{headers:{"User-Agent":PC_UA}});let src = jsp.pdfh(html,"ul&&li&&a&&href");print(src);HOST=src.replace("/index.php","")',//网页域名根动态抓取js代码。通过HOST=赋值 homeUrl:'/latest/',//网站的首页链接,可以是完整路径或者相对路径,用于分类获取和推荐获取 fyclass是分类标签 fypage是页数 url:'/fyclass/fypage.html[/fyclass/]',//网站的分类页面链接 detailUrl:'https://yanetflix.com/voddetail/fyid.html',//非必填,二级详情拼接链接,感觉没啥卵用 diff --git a/doc/更新日志.md b/doc/更新日志.md index 57d01ee..3d1232f 100644 --- a/doc/更新日志.md +++ b/doc/更新日志.md @@ -1,3 +1,11 @@ +###### 2023/06/07 +- [X] 后台管理增加p2p播放器 +- [X] 3.9.43beta1 drpy新增hostJs属性,可以用js动态捕捉网页根地址,下面是一个示例 +对应8K影视.js +```javascript +hostJs:'print(HOST);let html=request(HOST,{headers:{"User-Agent":PC_UA}});let src = jsp.pdfh(html,"ul&&li&&a&&href");print(src);HOST=src.replace("/index.php","")' +``` + ###### 2023/06/02 - [X] 3.9.42beta22 后台管理增加两套通用在线播放器地址 diff --git a/js/8K影视.js b/js/8K影视.js index cc5fc88..aa2e2b5 100644 --- a/js/8K影视.js +++ b/js/8K影视.js @@ -6,6 +6,7 @@ var rule={ title:'8K影视', 模板:'mxone5', host:'http://www.8kvod.com', + hostJs:'print(HOST);let html=request(HOST,{headers:{"User-Agent":PC_UA}});let src = jsp.pdfh(html,"ul&&li&&a&&href");print(src);HOST=src.replace("/index.php","")', // host:'http://www.xn--45q.top', homeUrl:'/index.php', url:'/t/fyclass-fypage/', diff --git a/js/drpy.txt b/js/drpy.txt index 2d87f05..e3614d0 100644 --- a/js/drpy.txt +++ b/js/drpy.txt @@ -1 +1 @@ -{"showTime":89200000,"txt":"drpy 3.9.42beta25 -道长"} \ No newline at end of file +{"showTime":89200000,"txt":"drpy 3.9.43beta1 -道长"} \ No newline at end of file diff --git a/js/version.txt b/js/version.txt index 2c45056..9a453d7 100644 --- a/js/version.txt +++ b/js/version.txt @@ -1 +1 @@ -3.9.42beta27 \ No newline at end of file +3.9.43beta1 \ No newline at end of file diff --git a/libs/drpy.js b/libs/drpy.js index 3bf72e4..0e5545c 100644 --- a/libs/drpy.js +++ b/libs/drpy.js @@ -55,7 +55,7 @@ function pre(){ } let rule = {}; -const VERSION = 'drpy1 3.9.41beta14 20230428'; +const VERSION = 'drpy1 3.9.43beta1 20230607'; /** 已知问题记录 * 1.影魔的jinjia2引擎不支持 {{fl}}对象直接渲染 (有能力解决的话尽量解决下,支持对象直接渲染字符串转义,如果加了|safe就不转义)[影魔牛逼,最新的文件发现这问题已经解决了] * Array.prototype.append = Array.prototype.push; 这种js执行后有毛病,for in 循环列表会把属性给打印出来 (这个大毛病需要重点排除一下) diff --git a/libs/drpy2.js b/libs/drpy2.js index 12cd4a6..81f5c11 100644 --- a/libs/drpy2.js +++ b/libs/drpy2.js @@ -41,7 +41,7 @@ function pre(){ let rule = {}; let vercode = typeof(pdfl) ==='function'?'drpy2.1':'drpy2'; -const VERSION = vercode+' 3.9.41beta14 20230428'; +const VERSION = vercode+' 3.9.43beta1 20230607'; /** 已知问题记录 * 1.影魔的jinjia2引擎不支持 {{fl}}对象直接渲染 (有能力解决的话尽量解决下,支持对象直接渲染字符串转义,如果加了|safe就不转义)[影魔牛逼,最新的文件发现这问题已经解决了] * Array.prototype.append = Array.prototype.push; 这种js执行后有毛病,for in 循环列表会把属性给打印出来 (这个大毛病需要重点排除一下) diff --git a/libs/pre.js b/libs/pre.js index 3180124..fb957f3 100644 --- a/libs/pre.js +++ b/libs/pre.js @@ -229,7 +229,10 @@ function request(url,obj){ }else{ new_obj = obj||{} } - if(!new_obj||!new_obj.headers||(!new_obj.headers['User-Agent']&&!new_obj.headers['user-agent'])){ + if(!new_obj||!new_obj.headers){ + new_obj.headers = {}; + } + if(!new_obj.headers['User-Agent']&&!new_obj.headers['user-agent']){ new_obj.headers['User-Agent'] = MOBILE_UA; } // delete new_obj.headers['Referer'];