diff --git a/ADC_function.py b/ADC_function.py index 0cc1fca..43da93c 100644 --- a/ADC_function.py +++ b/ADC_function.py @@ -5,17 +5,17 @@ import re from retrying import retry import sys -# content = open('proxy.ini').read() +# content = open('config.ini').read() # content = re.sub(r"\xfe\xff","", content) # content = re.sub(r"\xff\xfe","", content) # content = re.sub(r"\xef\xbb\xbf","", content) # open('BaseConfig.cfg', 'w').write(content) config = RawConfigParser() -if os.path.exists('proxy.ini'): - config.read('proxy.ini', encoding='UTF-8') +if os.path.exists('config.ini'): + config.read('config.ini', encoding='UTF-8') else: - with open("proxy.ini", "wt", encoding='UTF-8') as code: + with open("config.ini", "wt", encoding='UTF-8') as code: print("[proxy]",file=code) print("proxy=127.0.0.1:1080",file=code) print("timeout=10", file=code) diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 182972c..b7bb698 100644 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -7,7 +7,7 @@ import ADC_function import json import shutil -version='0.11.2' +version='0.11.4' os.chdir(os.getcwd()) def UpdateCheck(): @@ -102,12 +102,20 @@ def getNumber(filepath): print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :') print('[-]' + str(os.path.basename(filepath)) + ' :', e) print('[-]Move ' + os.path.basename(filepath) + ' to failed folder') + shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/') except IOError as e2: print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :') print('[-]' + str(os.path.basename(filepath)) + ' :', e2) print('[-]Move ' + os.path.basename(filepath) + ' to failed folder') shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/') +def RunCore(): + if os.path.exists('core.py'): + os.system('python core.py' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动 (用于源码py) + elif os.path.exists('core.exe'): + os.system('core.exe' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从exe文件启动(用于EXE版程序: + elif os.path.exists('core.py') and os.path.exists('core.exe'): + os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 选择从py文件启动 (用于源码py) if __name__ =='__main__': print('[*]===========AV Data Capture===========') @@ -124,11 +132,9 @@ if __name__ =='__main__': percentage = str(count/int(count_all)*100)[:4]+'%' print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -') print("[!]Making Data for [" + i + "],the number is [" + getNumber(i) + "]") - os.system('python core.py' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动 (用于源码py) - #os.system('core.exe' + ' "' + i + '" --number "'+getNumber(i)+'"') #选择从exe文件启动(用于EXE版程序 - #print() + RunCore() print("[*]=====================================") CEF('JAV_output') print("[+]All finished!!!") - input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") + input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") \ No newline at end of file diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..c6ff3ef --- /dev/null +++ b/config.ini @@ -0,0 +1,12 @@ +[proxy] +proxy=127.0.0.1:1080 +timeout=10 +retry=3 + +[Name_Rule] +location_rule='JAV_output/'+actor+'/'+number +naming_rule=number+'-'+title + +[update] +update_check=0 +#on=1,off=0 \ No newline at end of file diff --git a/core.py b/core.py index 96c20af..c099774 100644 --- a/core.py +++ b/core.py @@ -30,6 +30,9 @@ tag=[] cn_sub='' path='' houzhui='' +website='' +json_data={} +actor_photo={} naming_rule =''#eval(config['Name_Rule']['naming_rule']) location_rule=''#eval(config['Name_Rule']['location_rule']) @@ -63,6 +66,8 @@ def getDataFromJSON(file_number): #从JSON返回元数据 global tag global image_main global cn_sub + global website + global actor_photo global naming_rule global location_rule @@ -70,7 +75,7 @@ def getDataFromJSON(file_number): #从JSON返回元数据 try: # 添加 需要 正则表达式的规则 # =======================javdb.py======================= if re.search('^\d{5,}', file_number).group() in file_number: - json_data = json.loads(javdb.main(file_number)) + json_data = json.loads(javbus.main_uncensored(file_number)) except: # 添加 无需 正则表达式的规则 # ====================fc2fans_club.py==================== if 'fc2' in file_number: @@ -99,6 +104,8 @@ def getDataFromJSON(file_number): #从JSON返回元数据 imagecut = json_data['imagecut'] tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') + actor_photo = json_data['actor_photo'] + website = json_data['website'] # ====================处理异常字符====================== #\/:*?"<>| if '\\' in title: @@ -142,9 +149,9 @@ def creatFolder(): #创建文件夹 #=====================资源下载部分=========================== def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder! config = ConfigParser() - config.read('proxy.ini', encoding='UTF-8') - proxy = str(config['proxy']['proxy']) - timeout = int(config['proxy']['timeout']) + config.read('config.ini', encoding='UTF-8') + proxy = str(config['proxy']['proxy']) + timeout = int(config['proxy']['timeout']) retry_count = int(config['proxy']['retry']) i = 0 @@ -184,19 +191,20 @@ def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in print('[-]Image Download : Connect retry '+str(i)+'/'+str(retry_count)) def imageDownload(filepath): #封面是否下载成功,否则移动到failed global path - if DownloadFileWithFilename(cover,naming_rule+'.jpg', path) == 'failed': + if DownloadFileWithFilename(cover,'fanart.jpg', path) == 'failed': shutil.move(filepath, 'failed/') os._exit(0) - DownloadFileWithFilename(cover, naming_rule+'.jpg', path) - print('[+]Image Downloaded!', path +'/'+naming_rule+'.jpg') + DownloadFileWithFilename(cover, 'fanart.jpg', path) + print('[+]Image Downloaded!', path +'/fanart.jpg') def PrintFiles(filepath): #global path global title global cn_sub + global actor_photo try: if not os.path.exists(path): os.makedirs(path) - with open(path + "/" + naming_rule + ".nfo", "wt", encoding='UTF-8') as code: + with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code: print("", file=code) print(" " + naming_rule + "", file=code) print(" ", file=code) @@ -207,13 +215,15 @@ def PrintFiles(filepath): print(" "+outline+"", file=code) print(" "+str(runtime).replace(" ","")+"", file=code) print(" " + director + "", file=code) - print(" " + naming_rule + ".png", file=code) - print(" " + naming_rule + ".png", file=code) - print(" "+naming_rule + '.jpg'+"", file=code) + print(" poster.png", file=code) + print(" thumb.png", file=code) + print(" fanart.jpg", file=code) try: - for u in actor_list: + for key, value in actor_photo.items(): print(" ", file=code) - print(" " + u + "", file=code) + print(" " + key + "", file=code) + if not actor_photo == '': # or actor_photo == []: + print(" " + value + "", file=code) print(" ", file=code) except: aaaa='' @@ -237,9 +247,9 @@ def PrintFiles(filepath): print(" " + number + "", file=code) print(" " + release + "", file=code) print(" "+cover+"", file=code) - print(" " + "https://www.javbus.com/"+number + "", file=code) + print(" " + website + "", file=code) print("", file=code) - print("[+]Writeed! "+path + "/" + naming_rule + ".nfo") + print("[+]Writeed! "+path + "/" + number + ".nfo") except IOError as e: print("[-]Write Failed!") print(e) @@ -253,31 +263,32 @@ def PrintFiles(filepath): def cutImage(): if imagecut == 1: try: - img = Image.open(path + '/' + naming_rule + '.jpg') + img = Image.open(path + '/fanart.jpg') imgSize = img.size w = img.width h = img.height img2 = img.crop((w / 1.9, 0, w, h)) - img2.save(path + '/' + naming_rule + '.png') + img2.save(path + '/poster.png') except: print('[-]Cover cut failed!') else: - img = Image.open(path + '/' + naming_rule + '.jpg') + img = Image.open(path + '/fanart.jpg') w = img.width h = img.height - img.save(path + '/' + naming_rule + '.png') + img.save(path + '/poster.png') def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移动至的位置 global houzhui houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group()) - os.rename(filepath, naming_rule + houzhui) - shutil.move(naming_rule + houzhui, path) + os.rename(filepath, number + houzhui) + shutil.move(number + houzhui, path) def renameJpgToBackdrop_copy(): - shutil.copy(path+'/'+naming_rule + '.jpg', path+'/Backdrop.jpg') + shutil.copy(path+'/fanart.jpg', path+'/Backdrop.jpg') + shutil.copy(path + '/poster.png', path + '/thumb.png') if __name__ == '__main__': filepath=argparse_get_file()[0] #影片的路径 - if '-c.' in filepath or '-C.' in filepath: + if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: cn_sub='1' if argparse_get_file()[1] == '': #获取手动拉去影片获取的番号 diff --git a/fc2fans_club.py b/fc2fans_club.py index d966b49..9915a87 100644 --- a/fc2fans_club.py +++ b/fc2fans_club.py @@ -61,18 +61,20 @@ def main(number2): htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html') dic = { - 'title': getTitle(htmlcode), - 'studio': getStudio(htmlcode), + 'title': getTitle(htmlcode), + 'studio': getStudio(htmlcode), 'year': '',#str(re.search('\d{4}',getRelease(number)).group()), - 'outline': getOutline(htmlcode,number), - 'runtime': getYear(getRelease(htmlcode)), + 'outline': getOutline(htmlcode,number), + 'runtime': getYear(getRelease(htmlcode)), 'director': getStudio(htmlcode), - 'actor': getActor(htmlcode), - 'release': getRelease(number), - 'number': 'FC2-'+number, - 'cover': getCover(htmlcode,number,htmlcode2), + 'actor': getActor(htmlcode), + 'release': getRelease(number), + 'number': 'FC2-'+number, + 'cover': getCover(htmlcode,number,htmlcode2), 'imagecut': 0, - 'tag':getTag(htmlcode), + 'tag': getTag(htmlcode), + 'actor_photo':'', + 'website': 'http://fc2fans.club/html/FC2-' + number + '.html', } #print(getTitle(htmlcode)) #print(getNum(htmlcode)) diff --git a/javbus.py b/javbus.py index 4ec73f9..92c4fb4 100644 --- a/javbus.py +++ b/javbus.py @@ -13,6 +13,18 @@ from ADC_function import * import javdb import siro +def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find_all(attrs={'class': 'star-name'}) + d={} + for i in a: + l=i.a['href'] + t=i.get_text() + html = etree.fromstring(get_html(l), etree.HTMLParser()) + p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") + p2={t:p} + d.update(p2) + return d def getTitle(htmlcode): #获取标题 doc = pq(htmlcode) title=str(doc('div.container h3').text()).replace(' ','-') @@ -100,17 +112,18 @@ def main(number): 'imagecut': 1, 'tag': getTag(htmlcode), 'label': getSerise(htmlcode), + 'actor_photo': getActorPhoto(htmlcode), + 'website': 'https://www.javbus.com/' + number, } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - if 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number: htmlcode = get_html('https://www.javbus.com/' + number) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + #dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) dic = { 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), 'studio': getStudio(htmlcode), 'year': getYear(htmlcode), - 'outline': getOutline(dww_htmlcode), + 'outline': '', 'runtime': getRuntime(htmlcode), 'director': getDirector(htmlcode), 'actor': getActor(htmlcode), @@ -120,6 +133,8 @@ def main(number): 'imagecut': 1, 'tag': getTag(htmlcode), 'label': getSerise(htmlcode), + 'actor_photo': getActorPhoto(htmlcode), + 'website': 'https://www.javbus.com/' + number, } js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') @@ -132,6 +147,9 @@ def main(number): def main_uncensored(number): htmlcode = get_html('https://www.javbus.com/' + number) dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + if getTitle(htmlcode) == '': + htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_')) + dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) dic = { 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''), 'studio': getStudio(htmlcode), @@ -146,6 +164,8 @@ def main_uncensored(number): 'tag': getTag(htmlcode), 'label': getSerise(htmlcode), 'imagecut': 0, + 'actor_photo': '', + 'website': 'https://www.javbus.com/' + number, } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') diff --git a/javdb.py b/javdb.py index 17023a3..3723117 100644 --- a/javdb.py +++ b/javdb.py @@ -56,6 +56,8 @@ def getTag(a): def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']") + if result == '': + result = str(html.xpath('/html/body/section/div/div[3]/div[1]/a/img/@src')).strip(" ['']") return result def getDirector(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() @@ -68,13 +70,13 @@ def getOutline(htmlcode): return result def main(number): try: - try: - a = get_html('https://javdb1.com/search?q=' + number + '&f=all') - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - except: - a = get_html('https://javdb1.com/search?q=' + number.replace('-', '_') + '&f=all') - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + a = get_html('https://javdb.com/search?q=' + number + '&f=all') + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") + if result1 == '': + a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all') + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") b = get_html('https://javdb1.com' + result1) soup = BeautifulSoup(b, 'lxml') @@ -95,17 +97,20 @@ def main(number): 'tag': getTag(a), 'label': getLabel(a), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': '', + 'website': 'https://javdb1.com' + result1, } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js except: - try: - a = get_html('https://javdb.com/search?q=' + number + '&f=all') - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - except: + a = get_html('https://javdb.com/search?q=' + number + '&f=all') + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") + if result1 == '': a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all') html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") + result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") + b = get_html('https://javdb.com' + result1) soup = BeautifulSoup(b, 'lxml') @@ -126,6 +131,8 @@ def main(number): 'tag': getTag(a), 'label': getLabel(a), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': '', + 'website':'https://javdb.com' + result1, } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js diff --git a/siro.py b/siro.py index c6eda94..2b144a5 100644 --- a/siro.py +++ b/siro.py @@ -95,6 +95,8 @@ def main(number2): 'tag': getTag(a), 'label':getLabel(a), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': '', + 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') return js