diff --git a/ADC_function.py b/ADC_function.py index 1c679ed..0cc1fca 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -36,20 +36,14 @@ def get_html(url,cookies = None):#网页请求核心 while i < retry_count: try: if not str(config['proxy']['proxy']) == '': - proxies = { - "http": "http://" + str(config['proxy']['proxy']), - "https": "https://" + str(config['proxy']['proxy']) - } - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} + proxies = {"http": "http://" + str(config['proxy']['proxy']),"https": "https://" + str(config['proxy']['proxy'])} + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} getweb = requests.get(str(url), headers=headers, timeout=int(config['proxy']['timeout']),proxies=proxies, cookies=cookies) getweb.encoding = 'utf-8' - # print(getweb.text) return getweb.text else: - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - getweb = requests.get(str(url), headers=headers,timeout=int(config['proxy']['timeout']), cookies=cookies) + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} + getweb = requests.get(str(url), headers=headers, timeout=int(config['proxy']['timeout']), cookies=cookies) getweb.encoding = 'utf-8' return getweb.text except requests.exceptions.RequestException: diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index dd0127b..afce144 100644 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -76,8 +76,6 @@ def getNumber(filepath): filepath1.strip('22-sht.me').strip('-HD').strip('-hd') filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath1)) # 去除文件名中时间 file_number = re.search('\w+-\d+', filename).group() - if '-C.' in filepath or '-c.' in filepath: - cn_sub = '1' return file_number except: # 提取不含减号-的番号 try: # 提取东京热番号格式 n1087 @@ -127,7 +125,8 @@ if __name__ =='__main__': print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -') print("[!]Making Data for [" + i + "],the number is [" + getNumber(i) + "]") os.system('python core.py' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动 (用于源码py) - #os.system('core.exe' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从exe文件启动(用于EXE版程序) + #print('core.exe' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从exe文件启动(用于EXE版程序 + #print() print("[*]=====================================") CEF('JAV_output') diff --git a/core.py b/core.py index 722ec5c..96c20af 100644 --- a/core.py +++ b/core.py @@ -71,9 +71,6 @@ def getDataFromJSON(file_number): #从JSON返回元数据 # =======================javdb.py======================= if re.search('^\d{5,}', file_number).group() in file_number: json_data = json.loads(javdb.main(file_number)) - # ======================siro.py========================== - elif re.search('\d+\D+', file_number).group() in file_number: - json_data = json.loads(siro.main(file_number)) except: # 添加 无需 正则表达式的规则 # ====================fc2fans_club.py==================== if 'fc2' in file_number: @@ -95,7 +92,7 @@ def getDataFromJSON(file_number): #从JSON返回元数据 outline = json_data['outline'] runtime = json_data['runtime'] director = json_data['director'] - actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 + actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 release = json_data['release'] number = json_data['number'] cover = json_data['cover'] diff --git a/javbus.py b/javbus.py index 2f284f7..4ec73f9 100644 --- a/javbus.py +++ b/javbus.py @@ -11,6 +11,7 @@ import time import json from ADC_function import * import javdb +import siro def getTitle(htmlcode): #获取标题 doc = pq(htmlcode) @@ -75,6 +76,13 @@ def getTag(htmlcode): # 获取演员 def main(number): + try: + if re.search('\d+\D+', number).group() in number: + js = siro.main(number) + return js + except: + aaaa='' + try: htmlcode = get_html('https://www.javbus.com/' + number) dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) diff --git a/proxy.ini b/proxy.ini index dd5c882..067c22c 100644 --- a/proxy.ini +++ b/proxy.ini @@ -4,7 +4,7 @@ timeout=10 retry=3 [Name_Rule] -location_rule='JAV_output/'+actor+'/['+number+']-'+title +location_rule='JAV_output/'+actor+'/'+number naming_rule=number+'-'+title [update] diff --git a/siro.py b/siro.py index ed8c84d..c6eda94 100644 --- a/siro.py +++ b/siro.py @@ -99,4 +99,4 @@ def main(number2): js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') return js -#print(main('200GANA-1581')) \ No newline at end of file +#print(main('300maan-401')) \ No newline at end of file