diff --git a/ADC_function.py b/ADC_function.py index b44416a..62006fe 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -1,10 +1,44 @@ import requests +from configparser import ConfigParser +import os + +config = ConfigParser() +if os.path.exists('proxy.ini'): + config.read('proxy.ini', encoding='UTF-8') +else: + with open("proxy.ini", "wt", encoding='UTF-8') as code: + print("[proxy]",file=code) + print("proxy=127.0.0.1:1080",file=code) def get_html(url):#网页请求核心 - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - getweb = requests.get(str(url),timeout=5,headers=headers) - getweb.encoding='utf-8' - try: - return getweb.text - except: - print("[-]Connect Failed! Please check your Proxy.") \ No newline at end of file + if not str(config['proxy']['proxy']) == '': + try: + proxies = {"http": "http://" + str(config['proxy']['proxy']), + "https": "https://" + str(config['proxy']['proxy'])} + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} + getweb = requests.get(str(url), timeout=10, headers=headers, proxies=proxies) + getweb.encoding = 'utf-8' + # print(getweb.text) + try: + return getweb.text + except: + print('[-]Connected failed!:Proxy error') + except: + aaaa='' + #print('[-]Connect Failed.') + + + else: + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} + getweb = requests.get(str(url), timeout=10, headers=headers) + getweb.encoding = 'utf-8' + try: + return getweb.text + except: + print("[-]Connect Failed.") + except: + aaaa = '' + #print('[-]Connect Failed.') \ No newline at end of file diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 6f81779..322e09b 100644 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -2,6 +2,7 @@ import glob import os import time import re +import sys def movie_lists(): #MP4 @@ -50,11 +51,11 @@ if __name__ =='__main__': if '_' in i: os.rename(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), rreplace(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), '_', '-', 1)) i = rreplace(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), '_', '-', 1) - os.system('python core.py' + ' "' + i + '"') #选择从py文件启动 (用于源码py) - #os.system('core.exe' + ' "' + i + '"') #选择从exe文件启动(用于EXE版程序) + #os.system('python core.py' + ' "' + i + '"') #选择从py文件启动 (用于源码py) + os.system('core.exe' + ' "' + i + '"') #选择从exe文件启动(用于EXE版程序) print("[*]=====================================") print("[!]Cleaning empty folders") CEF('JAV_output') print("[+]All finished!!!") - time.sleep(3) \ No newline at end of file + input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看错误信息。") \ No newline at end of file diff --git a/core.py b/core.py index 892ea39..adb394b 100644 --- a/core.py +++ b/core.py @@ -8,6 +8,8 @@ import javbus import json import fc2fans_club import siro +from ADC_function import * +from configparser import ConfigParser #初始化全局变量 title='' @@ -25,22 +27,49 @@ tag=[] #=====================资源下载部分=========================== def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder! - import requests - try: - if not os.path.exists(path): - os.makedirs(path) - r = requests.get(url) - with open(str(path) + "/"+str(filename), "wb") as code: - code.write(r.content) - except IOError as e: - print("[-]Movie not found in All website!") - #print("[*]=====================================") - return "failed" - except Exception as e1: - print(e1) - print("[-]Download Failed2!") - time.sleep(3) - os._exit(0) + config = ConfigParser() + config.read('proxy.ini', encoding='UTF-8') + proxy = str(config['proxy']['proxy']) + + if not str(config['proxy']['proxy']) == '': + try: + if not os.path.exists(path): + os.makedirs(path) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} + r = requests.get(url,timeout=10, headers=headers,proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)}) + with open(str(path) + "/" + str(filename), "wb") as code: + code.write(r.content) + # print(bytes(r),file=code) + except IOError as e: + print("[-]Movie not found in All website!") + print("[-]" + str(filename), e) + # print("[*]=====================================") + return "failed" + except Exception as e1: + print(e1) + print("[-]Download Failed2!") + time.sleep(3) + os._exit(0) + else: + try: + if not os.path.exists(path): + os.makedirs(path) + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} + r = requests.get(url,timeout=10, headers=headers) + with open(str(path) + "/" + str(filename), "wb") as code: + code.write(r.content) + # print(bytes(r),file=code) + except IOError as e: + print("[-]Movie not found in All website!") + print("[-]" + str(filename), e) + # print("[*]=====================================") + return "failed" + except Exception as e1: + print(e1) + print("[-]Download Failed2!") + time.sleep(3) + os._exit(0) def PrintFiles(path): try: if not os.path.exists(path): @@ -73,7 +102,12 @@ def PrintFiles(path): for i in tag: print(" " + i + "", file=code) except: - aaaa='' + aaaaa='' + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaaaaa='' print(" " + number + "", file=code) print(" " + release + "", file=code) print(" "+cover+"", file=code) @@ -94,6 +128,14 @@ def argparse_get_file(): parser.add_argument("file", help="Write the file path on here") args = parser.parse_args() return args.file +def CreatFailedFolder(): + if not os.path.exists('failed/'): # 新建failed文件夹 + try: + os.makedirs('failed/') + except: + print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)") + os._exit(0) + def getNumberFromFilename(filepath): global title global studio @@ -108,27 +150,59 @@ def getNumberFromFilename(filepath): global imagecut global tag - filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", os.path.basename(filepath))) - print("[!]Making Data for ["+filename+"]") - file_number = str(re.search('\w+-\w+', filename).group()) - #print(a) +#================================================获取文件番号================================================ + try: #试图提取番号 + # ====番号获取主程序==== + try: # 普通提取番号 主要处理包含减号-的番号 + filepath.strip('22-sht.me').strip('-HD').strip('-hd') + filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中文件名 + file_number = re.search('\w+-\d+', filename).group() + except: # 提取不含减号-的番号 + try: # 提取东京热番号格式 n1087 + filename1 = str(re.sub("h26\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot') + filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_') + file_number = str(re.search('n\d{4}', filename0).group(0)) + except: # 提取无减号番号 + filename1 = str(re.sub("h26\d", "", filepath)) # 去除h264/265 + filename0 = str(re.sub(".*?\.com-\d+", "", filename1)) + file_number2 = str(re.match('\w+', filename0).group()) + file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),re.match("^[A-Za-z]+", file_number2).group() + '-')) + if not re.search('\w-', file_number).group() == 'None': + file_number = re.search('\w+-\w+', filename).group() + #上面是插入减号-到番号中 + print("[!]Making Data for [" + filename + "],the number is [" + file_number + "]") + # ====番号获取主程序=结束=== + except Exception as e: #番号提取异常 + print('[-]'+str(os.path.basename(filepath))+' Cannot catch the number :') + print('[-]' + str(os.path.basename(filepath)) + ' :', e) + print('[-]Move ' + os.path.basename(filepath) + ' to failed folder') + shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/') + os._exit(0) + except IOError as e2: + print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :') + print('[-]' + str(os.path.basename(filepath)) + ' :',e2) + print('[-]Move ' + os.path.basename(filepath) + ' to failed folder') + shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/') + os._exit(0) try: - -#================================================网站规则添加开始================================================ - +# ================================================网站规则添加开始================================================ try: #添加 需要 正则表达式的规则 + #=======================javbus.py======================= if re.search('^\d{5,}', file_number).group() in filename: json_data = json.loads(javbus.main_uncensored(file_number)) except: #添加 无需 正则表达式的规则 + # ====================fc2fans_club.py=================== if 'fc2' in filename: json_data = json.loads(fc2fans_club.main(file_number)) elif 'FC2' in filename: json_data = json.loads(fc2fans_club.main(file_number)) + + #========================siro.py======================== elif 'siro' in filename: json_data = json.loads(siro.main(file_number)) elif 'SIRO' in filename: @@ -137,38 +211,53 @@ def getNumberFromFilename(filepath): json_data = json.loads(siro.main(file_number)) elif '259LUXU' in filename: json_data = json.loads(siro.main(file_number)) + elif '300MAAN' in filename: + json_data = json.loads(siro.main(file_number)) + elif '300maan' in filename: + json_data = json.loads(siro.main(file_number)) + elif '326SCP' in filename: + json_data = json.loads(siro.main(file_number)) + elif '326scp' in filename: + json_data = json.loads(siro.main(file_number)) + elif '326URF' in filename: + json_data = json.loads(siro.main(file_number)) + elif '326urf' in filename: + json_data = json.loads(siro.main(file_number)) + + #=======================javbus.py======================= else: json_data = json.loads(javbus.main(file_number)) + #================================================网站规则添加结束================================================ - - - title = json_data['title'] - studio = json_data['studio'] - year = json_data['year'] - outline = json_data['outline'] - runtime = json_data['runtime'] + title = json_data['title'] + studio = json_data['studio'] + year = json_data['year'] + outline = json_data['outline'] + runtime = json_data['runtime'] director = json_data['director'] - actor = str(json_data['actor']).strip("[ ]").replace("'",'').replace(" ",'').split(',') - release = json_data['release'] - number = json_data['number'] - cover = json_data['cover'] + actor = str(json_data['actor']).strip("[ ]").replace("'",'').replace(" ",'').split(',') #字符串转列表 + release = json_data['release'] + number = json_data['number'] + cover = json_data['cover'] imagecut = json_data['imagecut'] - tag = str(json_data['tag']).strip("[ ]").replace("'",'').replace(" ",'').split(',') - except: - print('[-]File '+filename+'`s number can not be caught') + tag = str(json_data['tag']).strip("[ ]").replace("'",'').replace(" ",'').split(',') #字符串转列表 + + + except IOError as e: + print('[-]'+str(e)) + print('[-]Move ' + filename + ' to failed folder') + shutil.move(filepath, str(os.getcwd())+'/'+'failed/') + os._exit(0) + + except Exception as e: + print('[-]'+str(e)) print('[-]Move ' + filename + ' to failed folder') - if not os.path.exists('failed/'): # 新建failed文件夹 - os.makedirs('failed/') - if not os.path.exists('failed/'): - print("[-]failed!Dirs can not be make (Please run as Administrator)") - time.sleep(3) - os._exit(0) shutil.move(filepath, str(os.getcwd())+'/'+'failed/') os._exit(0) @@ -177,11 +266,6 @@ path = '' #设置path为全局变量,后面移动文件要用 def creatFolder(): actor2 = str(actor).strip("[ ]").replace("'",'').replace(" ",'') global path - if not os.path.exists('failed/'): #新建failed文件夹 - os.makedirs('failed/') - if not os.path.exists('failed/'): - print("[-]failed!Dirs can not be make (Please run as Administrator)") - os._exit(0) if len(actor2) > 240: #新建成功输出文件夹 path = 'JAV_output' + '/' + '超多人' + '/' + number #path为影片+元数据所在目录 else: @@ -220,6 +304,7 @@ def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移 if __name__ == '__main__': filepath=argparse_get_file() #影片的路径 + CreatFailedFolder() getNumberFromFilename(filepath) #定义番号 creatFolder() #创建文件夹 imageDownload(filepath) #creatFoder会返回番号路径 diff --git a/javbus.py b/javbus.py index fc68aef..2b8744b 100644 --- a/javbus.py +++ b/javbus.py @@ -12,7 +12,7 @@ import json def get_html(url):#网页请求核心 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - getweb = requests.get(str(url),timeout=5,headers=headers).text + getweb = requests.get(str(url),timeout=10,headers=headers).text try: return getweb except: @@ -97,9 +97,6 @@ def main(number): def main_uncensored(number): htmlcode = get_html('https://www.javbus.com/' + number) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) - #print('un') - #print('https://www.javbus.com/' + number) dic = { 'title': getTitle(htmlcode), 'studio': getStudio(htmlcode), @@ -116,13 +113,10 @@ def main_uncensored(number): } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - if getYear(htmlcode) == '': - #print('un2') + if getYear(htmlcode) == '' or getYear(htmlcode) == 'null': number2 = number.replace('-', '_') htmlcode = get_html('https://www.javbus.com/' + number2) - #print('https://www.javbus.com/' + number2) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number2.replace("_", '')) - dic = { + dic2 = { 'title': getTitle(htmlcode), 'studio': getStudio(htmlcode), 'year': getYear(htmlcode), @@ -136,11 +130,10 @@ def main_uncensored(number): 'tag': getTag(htmlcode), 'imagecut': 0, } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - #print(js) - return js - else: - bbb='' + js2 = json.dumps(dic2, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js2 + + return js # def return1(): diff --git a/siro.py b/siro.py index 548d610..f7359ee 100644 --- a/siro.py +++ b/siro.py @@ -7,7 +7,7 @@ from bs4 import BeautifulSoup def get_html(url):#网页请求核心 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} cookies = {'adc':'1'} - getweb = requests.get(str(url),timeout=5,cookies=cookies,headers=headers).text + getweb = requests.get(str(url),timeout=10,cookies=cookies,headers=headers).text try: return getweb except: