diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index d9d4531..e1b13bf 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -9,8 +9,8 @@ from ADC_function import * from core import * import json import shutil -import fnmatch from configparser import ConfigParser +import argparse def UpdateCheck(version): @@ -26,6 +26,14 @@ def UpdateCheck(version): else: print('[+]Update Check disabled!') +def argparse_get_file(): + parser = argparse.ArgumentParser() + parser.add_argument("file", default='',nargs='?', help="Write the file path on here") + args = parser.parse_args() + if args.file == '': + return '' + else: + return args.file def movie_lists(escape_folder): escape_folder = re.split('[,,]', escape_folder) @@ -67,8 +75,11 @@ def CEF(path): a = '' -def getNumber(filepath): - filepath = filepath.replace('.\\', '') +def getNumber(filepath,absolute_path = False): + if absolute_path == True: + filepath=filepath.replace('\\','/') + file_number = str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-') + return file_number if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号 filepath = filepath.replace("_", "-") filepath.strip('22-sht.me').strip('-HD').strip('-hd') @@ -79,14 +90,13 @@ def getNumber(filepath): return file_number else: # 提取不含减号-的番号,FANZA CID try: - return str( - re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-') + return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-') except: return re.search(r'(.+?)\.', filepath)[0] if __name__ == '__main__': - version = '2.4' + version = '2.5' config_file = 'config.ini' config = ConfigParser() config.read(config_file, encoding='UTF-8') @@ -102,6 +112,19 @@ if __name__ == '__main__': os.chdir(os.getcwd()) movie_list = movie_lists(escape_folder) + #========== 野鸡番号拖动 ========== + number_argparse=argparse_get_file() + if not number_argparse == '': + print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]") + core_main(number_argparse, getNumber(number_argparse,absolute_path = True)) + print("[*]======================================================") + CEF(success_folder) + CEF(failed_folder) + print("[+]All finished!!!") + input("[+][+]Press enter key exit, you can check the error messge before you exit.") + os._exit(0) + # ========== 野鸡番号拖动 ========== + count = 0 count_all = str(len(movie_list)) print('[+]Find', count_all, 'movies') @@ -121,8 +144,13 @@ if __name__ == '__main__': print('[-]Link', i, 'to failed folder') os.symlink(i, str(os.getcwd()) + '/' + 'failed/') else: - print('[-]Move ' + i + ' to failed folder') - shutil.move(i, str(os.getcwd()) + '/' + 'failed/') + try: + print('[-]Move ' + i + ' to failed folder') + shutil.move(i, str(os.getcwd()) + '/' + 'failed/') + except FileExistsError: + print('[!]File exists in failed!') + except: + print('[+]skip') continue CEF(success_folder) diff --git a/avsox.py b/avsox.py index 16ab19d..67ee9bf 100644 --- a/avsox.py +++ b/avsox.py @@ -3,6 +3,9 @@ from lxml import etree import json from bs4 import BeautifulSoup from ADC_function import * +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img soup = BeautifulSoup(htmlcode, 'lxml') diff --git a/core.py b/core.py index 5fce2a0..c0c9d1c 100755 --- a/core.py +++ b/core.py @@ -85,6 +85,11 @@ def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元 actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 release = json_data['release'] number = json_data['number'] + studio = json_data['studio'] + source = json_data['source'] + runtime = json_data['runtime'] + outline = json_data['runtime'] + label = json_data['label'] try: cover_small = json_data['cover_small'] except: @@ -93,9 +98,11 @@ def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元 tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') + if title == '' or number == '': print('[-]Movie Data not found!') moveFailedFolder(filepath, failed_folder) + return # if imagecut == '3': # DownloadFileWithFilename() @@ -255,12 +262,14 @@ def DownloadFileWithFilename(url, filename, path, Config, filepath, failed_folde print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) print('[-]Connect Failed! Please check your Proxy or Network!') moveFailedFolder(filepath, failed_folder) + return def imageDownload(option, cover, number, c_word, path, multi_part, Config, filepath, failed_folder): # 封面是否下载成功,否则移动到failed if option == 'emby': if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) == 'failed': moveFailedFolder(filepath, failed_folder) + return DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') @@ -284,6 +293,7 @@ def imageDownload(option, cover, number, c_word, path, multi_part, Config, filep elif option == 'plex': if DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) == 'failed': moveFailedFolder(filepath, failed_folder) + return DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) if not os.path.getsize(path + '/fanart.jpg') == 0: print('[+]Image Downloaded!', path + '/fanart.jpg') @@ -304,6 +314,7 @@ def imageDownload(option, cover, number, c_word, path, multi_part, Config, filep elif option == 'kodi': if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) == 'failed': moveFailedFolder(filepath, failed_folder) + return DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0: print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg') @@ -472,10 +483,12 @@ def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filep print("[-]Write Failed!") print(e) moveFailedFolder(filepath, failed_folder) + return except Exception as e1: print(e1) print("[-]Write Failed!") moveFailedFolder(filepath, failed_folder) + return def cutImage(option, imagecut, path, number, c_word): @@ -606,6 +619,7 @@ def get_part(filepath, failed_folder): except: print("[-]failed!Please rename the filename again!") moveFailedFolder(filepath, failed_folder) + return def debug_mode(json_data): diff --git a/fanza.py b/fanza.py index 47872fd..36e87ee 100644 --- a/fanza.py +++ b/fanza.py @@ -57,10 +57,10 @@ def getRelease(a): def getTag(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() try: - result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()")).strip(" ['']") + result1 = html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()") except: - result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/text()")).strip(" ['']") - return result1.replace("', '",",") + result1 = html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/text()") + return result1 def getCover(htmlcode,number): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = html.xpath('//*[@id="'+number+'"]/@href')[0] @@ -110,4 +110,4 @@ def main(number): # main('DV-1562') # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") -#print(main('n0635')) +#print(main('ipx292')) diff --git a/fc2fans_club.py b/fc2fans_club.py index 72cb773..c44c905 100755 --- a/fc2fans_club.py +++ b/fc2fans_club.py @@ -2,6 +2,9 @@ import re from lxml import etree#need install import json import ADC_function +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) def getTitle(htmlcode): #获取厂商 #print(htmlcode) @@ -55,8 +58,53 @@ def getYear(release): except: return '' +def getTitle_fc2com(htmlcode): #获取厂商 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0] + return result +def getActor_fc2com(htmlcode): + try: + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] + return result + except: + return '' +def getStudio_fc2com(htmlcode): #获取厂商 + try: + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']") + return result + except: + return '' +def getNum_fc2com(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") + return result +def getRelease_fc2com(htmlcode2): # + html=etree.fromstring(htmlcode2,etree.HTMLParser()) + result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") + return result +def getCover_fc2com(htmlcode2): #获取厂商 # + html = etree.fromstring(htmlcode2, etree.HTMLParser()) + result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']") + return 'http:' + result +def getOutline_fc2com(htmlcode2): #获取番号 # + html = etree.fromstring(htmlcode2, etree.HTMLParser()) + result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') + return result +def getTag_fc2com(number): #获取番号 + htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape')) + result = re.findall('"tag":"(.*?)"', htmlcode) + return result +def getYear_fc2com(release): + try: + result = re.search('\d{4}',release).group() + return result + except: + return '' + def main(number): - htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+number+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') + htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/') htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html') actor = getActor(htmlcode) if getActor(htmlcode) == '': @@ -65,12 +113,13 @@ def main(number): 'title': getTitle(htmlcode), 'studio': getStudio(htmlcode), 'year': '',#str(re.search('\d{4}',getRelease(number)).group()), - 'outline': getOutline(htmlcode2), + 'outline': '',#getOutline(htmlcode2), 'runtime': getYear(getRelease(htmlcode)), 'director': getStudio(htmlcode), 'actor': actor, 'release': getRelease(number), 'number': 'FC2-'+number, + 'label': '', 'cover': getCover(htmlcode,number,htmlcode2), 'imagecut': 0, 'tag': getTag(htmlcode), @@ -78,7 +127,31 @@ def main(number): 'website': 'https://fc2club.com//html/FC2-' + number + '.html', 'source':'https://fc2club.com//html/FC2-' + number + '.html', } + if dic['title'] == '': + htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'}) + actor = getActor(htmlcode) + if getActor(htmlcode) == '': + actor = 'FC2系列' + dic = { + 'title': getTitle_fc2com(htmlcode2), + 'studio': getStudio_fc2com(htmlcode2), + 'year': '', # str(re.search('\d{4}',getRelease(number)).group()), + 'outline': getOutline_fc2com(htmlcode2), + 'runtime': getYear_fc2com(getRelease(htmlcode2)), + 'director': getStudio_fc2com(htmlcode2), + 'actor': actor, + 'release': getRelease_fc2com(number), + 'number': 'FC2-' + number, + 'cover': getCover_fc2com(htmlcode2), + 'imagecut': 0, + 'tag': getTag_fc2com(number), + 'label': '', + 'actor_photo': '', + 'website': 'http://adult.contents.fc2.com/article/' + number + '/', + 'source': 'http://adult.contents.fc2.com/article/' + number + '/', + } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') return js -#print(main('1051725')) + +#print(main('1252953')) diff --git a/javdb.py b/javdb.py index 9bc67fd..4d713aa 100755 --- a/javdb.py +++ b/javdb.py @@ -8,12 +8,9 @@ from ADC_function import * # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) def getTitle(a): - try: - html = etree.fromstring(a, etree.HTMLParser()) - result = str(html.xpath('/html/body/section/div/h2/strong/text()')).strip(" ['']") - return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '').replace(' : ', '')) - except: - return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '')) + html = etree.fromstring(a, etree.HTMLParser()) + result = html.xpath("/html/body/section/div/h2/strong/text()")[0] + return result def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") @@ -83,71 +80,32 @@ def getOutline(htmlcode): return result def main(number): number = number.upper() - try: - a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0] - b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ') - dic = { - 'actor': getActor(b), - 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a), - '').replace( - '无码', '').replace('有码', '').lstrip(' ').replace(number,''), - 'studio': getStudio(b), - 'outline': getOutline(b), - 'runtime': getRuntime(b), - 'director': getDirector(b), - 'release': getRelease(b), - 'number': getNum(b), - 'cover': getCover(b), - 'cover_small': getCover_small(a), - 'imagecut': 3, - 'tag': getTag(b), - 'label': getLabel(b), - 'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': getActorPhoto(getActor(b)), - 'website': 'https://javdb.com' + result1, - 'source': 'javdb.py', - } - if getNum(b) != number: # 与搜索到的番号不匹配 - dic['title'] = '' - dic['number'] = '' - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js - except: - a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0] - print(html.xpath('//*[@id="videos"]/div/div/a/@href')) - b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ') - dic = { - 'actor': getActor(b), - 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace( - getNum(b), - '').replace( - '无码', '').replace('有码', '').lstrip(' ').replace(number,''), - 'studio': getStudio(b), - 'outline': getOutline(b), - 'runtime': getRuntime(b), - 'director': getDirector(b), - 'release': getRelease(b), - 'number': getNum(b), - 'cover': getCover(b), - 'cover_small': getCover_small(a), - 'imagecut': 3, - 'tag': getTag(b), - 'label': getLabel(b), - 'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': getActorPhoto(getActor(b)), - 'website': 'https://javdb3.com' + result1, - 'source': 'javdb.py', - } - if getNum(b) != number: # 与搜索到的番号不匹配 - dic['title'] = '' - dic['number'] = '' - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js + a = get_html('https://javdb.com/search?q=' + number + '&f=all') + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0] + b = get_html('https://javdb.com' + result1) + dic = { + 'actor': getActor(b), + 'title': getTitle(b), + 'studio': getStudio(b), + 'outline': getOutline(b), + 'runtime': getRuntime(b), + 'director': getDirector(b), + 'release': getRelease(b), + 'number': getNum(b), + 'cover': getCover(b), + 'cover_small': getCover_small(a), + 'imagecut': 3, + 'tag': getTag(b), + 'label': getLabel(b), + 'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': getActorPhoto(getActor(b)), + 'website': 'https://javdb.com' + result1, + 'source': 'javdb.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js # main('DV-1562') # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") -# print(main('YMDD-178')) +#print(main('ipx-292')) diff --git a/siro.py b/siro.py index c16537f..1a3ab3d 100755 --- a/siro.py +++ b/siro.py @@ -3,6 +3,9 @@ from lxml import etree import json from bs4 import BeautifulSoup from ADC_function import * +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) def getTitle(a): try: diff --git a/update_check.json b/update_check.json index f5d98ab..c671921 100644 --- a/update_check.json +++ b/update_check.json @@ -1,5 +1,5 @@ { - "version": "2.4", - "version_show":"2.4", + "version": "2.5", + "version_show":"2.5", "download": "https://github.com/yoshiko2/AV_Data_Capture/releases" }