diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 82adf91..4bb0c1a 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -14,7 +14,7 @@ os.chdir(os.getcwd()) # ============global var=========== -version='2.2' +version='2.3' config = ConfigParser() config.read(config_file, encoding='UTF-8') @@ -23,20 +23,6 @@ Platform = sys.platform # ==========global var end========= -def moveMovies(): - movieFiles = [] - fromPath = config['movie_location']['path'] - if Platform == 'win32': - movieFormat = ["avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"] - else: - movieFormat = ["AVI", "RMVB", "WMV", "MOV", "MP4", "MKV", "FLV", "TS","avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"] - for fm in movieFormat: - movieFiles = movieFiles + [os.path.join(dirpath, f) - for dirpath, dirnames, files in os.walk(fromPath) - for f in fnmatch.filter(files, '*.' + fm)] - for movie in movieFiles: - print("Move file " + movie) - shutil.move(movie, os.path.curdir) def UpdateCheck(): if UpdateCheckSwitch() == '1': html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json') @@ -53,11 +39,17 @@ def movie_lists(): global exclude_directory_1 global exclude_directory_2 total=[] - file_type = ['mp4','avi','rmvb','wmv','mov','mkv','flv','ts'] + file_type = ['.mp4','.avi','.rmvb','.wmv','.mov','.mkv','.flv','.ts','.MP4', '.AVI', '.RMVB', '.WMV', '.MOV', '.MKV', '.FLV', '.TS',] exclude_directory_1 = config['common']['failed_output_folder'] exclude_directory_2 = config['common']['success_output_folder'] - for a in file_type: - total += glob.glob(r"./*." + a) + file_root=os.getcwd() + for root,dirs,files in os.walk(file_root): + if exclude_directory_1 not in root and exclude_directory_2 not in root: + for f in files: + if os.path.splitext(f)[1] in file_type: + path = os.path.join(root,f) + path = path.replace(file_root,'.') + total.append(path) return total def CreatFailedFolder(): if not os.path.exists('failed/'): # 新建failed文件夹 @@ -86,7 +78,7 @@ def rreplace(self, old, new, *max): return new.join(self.rsplit(old, count)) def getNumber(filepath): filepath = filepath.replace('.\\','') - try: # 普通提取番号 主要处理包含减号-的番号 + if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号 filepath = filepath.replace("_", "-") filepath.strip('22-sht.me').strip('-HD').strip('-hd') filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间 @@ -97,18 +89,11 @@ def getNumber(filepath): except: # 提取类似mkbd-s120番号 file_number = re.search('\w+-\w+\d+', filename).group() return file_number - except: # 提取不含减号-的番号 + else: # 提取不含减号-的番号,FANZA CID try: - filename = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot') - filename = str(re.sub(".*?\.com-\d+", "", filename)).replace('_', '') - file_number = str(re.search('\w+\d{4}', filename).group(0)) - return file_number - except: # 提取无减号番号 - filename = str(re.sub("ts6\d", "", filepath)) # 去除ts64/265 - filename = str(re.sub(".*?\.com-\d+", "", filename)) - file_number = str(re.match('\w+', filename).group()) - file_number = str(file_number.replace(re.match("^[A-Za-z]+", file_number).group(),re.match("^[A-Za-z]+", file_number).group() + '-')) - return file_number + return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-') + except: + return re.search(r'(.+?)\.',filepath)[0] def RunCore(): if Platform == 'win32': @@ -120,7 +105,10 @@ def RunCore(): os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py) else: if os.path.exists('core.py'): - os.system('python3 core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py) + try: + os.system('python3 core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py) + except: + os.system('python core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py) elif os.path.exists('core.exe'): os.system('core.exe' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从exe启动(用于EXE版程序) elif os.path.exists('core.py') and os.path.exists('core.exe'): @@ -130,17 +118,18 @@ if __name__ =='__main__': print('[*]================== AV Data Capture ===================') print('[*] Version '+version) print('[*]======================================================') + CreatFailedFolder() UpdateCheck() - moveMovies() os.chdir(os.getcwd()) + movie_list=movie_lists() count = 0 - count_all = str(len(movie_lists())) - print('[+]Find',str(len(movie_lists())),'movies') + count_all = str(len(movie_list)) + print('[+]Find',count_all,'movies') if config['common']['soft_link'] == '1': print('[!] --- Soft link mode is ENABLE! ----') - for i in movie_lists(): #遍历电影列表 交给core处理 + for i in movie_list: #遍历电影列表 交给core处理 count = count + 1 percentage = str(count/int(count_all)*100)[:4]+'%' print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -') diff --git a/config.ini b/config.ini index 75ddbf2..bb7ac69 100644 --- a/config.ini +++ b/config.ini @@ -23,5 +23,5 @@ media_warehouse=emby [escape] literals=\() -[movie_location] -path= +[debug_mode] +switch=0 \ No newline at end of file diff --git a/core.py b/core.py index dc47a65..9237aaa 100755 --- a/core.py +++ b/core.py @@ -16,6 +16,7 @@ import siro import avsox import javbus import javdb +import fanza # =========website======== @@ -135,6 +136,14 @@ def getDataFromJSON(file_number): # 从JSON返回元数据 # == elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number: json_data = json.loads(siro.main(file_number)) + elif not '-' in file_number or '_' in file_number: + json_data = json.loads(fanza.main(file_number)) + if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(javbus.main(file_number)) + if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(avsox.main(file_number)) + if getDataState(json_data) == 0: # 如果元数据获取失败,请求番号至其他网站抓取 + json_data = json.loads(javdb.main(file_number)) # == else: json_data = json.loads(javbus.main(file_number)) @@ -145,7 +154,7 @@ def getDataFromJSON(file_number): # 从JSON返回元数据 # ================================================网站规则添加结束================================================ - title = str(json_data['title']).replace(' ', '') + title = json_data['title'] studio = json_data['studio'] year = json_data['year'] outline = json_data['outline'] @@ -305,6 +314,18 @@ def imageDownload(): # 封面是否下载成功,否则移动到failed if DownloadFileWithFilename(cover, number + c_word + '.jpg', path) == 'failed': moveFailedFolder() DownloadFileWithFilename(cover, number + c_word + '.jpg', path) + if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: + print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') + return + i = 1 + while i <= int(config['proxy']['retry']): + if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: + print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]') + DownloadFileWithFilename(cover, number + c_word + '.jpg', path) + i = i + 1 + continue + else: + break if multi_part == 1: old_name = os.path.join(path, number + c_word + '.jpg') new_name = os.path.join(path, number + c_word + '.jpg') @@ -316,11 +337,38 @@ def imageDownload(): # 封面是否下载成功,否则移动到failed if DownloadFileWithFilename(cover, 'fanart.jpg', path) == 'failed': moveFailedFolder() DownloadFileWithFilename(cover, 'fanart.jpg', path) + if not os.path.getsize(path + '/fanart.jpg') == 0: + print('[+]Image Downloaded!', path + '/fanart.jpg') + return + i = 1 + while i <= int(config['proxy']['retry']): + if os.path.getsize(path + '/fanart.jpg') == 0: + print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]') + DownloadFileWithFilename(cover, 'fanart.jpg', path) + i = i + 1 + continue + else: + break + if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: + print('[!]Image Download Failed! Trying again.') + DownloadFileWithFilename(cover, number + c_word + '.jpg', path) print('[+]Image Downloaded!', path + '/fanart.jpg') elif option == 'kodi': if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path) == 'failed': moveFailedFolder() DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path) + if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0: + print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg') + return + i = 1 + while i <= int(config['proxy']['retry']): + if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0: + print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]') + DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path) + i = i + 1 + continue + else: + break print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg') @@ -330,6 +378,7 @@ def PrintFiles(): os.makedirs(path) if option == 'plex': with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: + print('', file=code) print("", file=code) print(" " + naming_rule + part + "", file=code) print(" ", file=code) @@ -377,6 +426,7 @@ def PrintFiles(): print("[+]Writeed! " + path + "/" + number + ".nfo") elif option == 'emby': with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: + print('', file=code) print("", file=code) print(" " + naming_rule + part + "", file=code) print(" ", file=code) @@ -424,6 +474,7 @@ def PrintFiles(): print("[+]Writeed! " + path + "/" + number + c_word + ".nfo") elif option == 'kodi': with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: + print('', file=code) print("", file=code) print(" " + naming_rule + part + "", file=code) print(" ", file=code) @@ -618,11 +669,11 @@ def debug_mode(): print('[+] ---Debug info---') for i, v in json_data.items(): if i == 'outline': - print('[+] -', i, ':', len(v), 'characters') + print('[+] -', i, ' :', len(v), 'characters') continue if i == 'actor_photo' or i == 'year': continue - print('[+] -', i+str(9-len(i)*'-'), ':', v) + print('[+] -',"%-11s" % i, ':', v) print('[+] ---Debug info---') except: aaa = '' diff --git a/fanza.py b/fanza.py new file mode 100644 index 0000000..34f21c8 --- /dev/null +++ b/fanza.py @@ -0,0 +1,108 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +import re +from lxml import etree +import json +from ADC_function import * +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) + +def getTitle(a): + html = etree.fromstring(a, etree.HTMLParser()) + result = html.xpath('//*[@id="title"]/text()')[0] + return result +def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + html = etree.fromstring(a, etree.HTMLParser()) + result = str(html.xpath("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()")).strip(" ['']").replace("', '",',') + return result +def getStudio(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/a/text()")[0] + except: + result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/text()")[0] + return result1 +def getRuntime(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] + return re.search('\d+', str(result1)).group() +def getLabel(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result1 = html.xpath("//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()")[0] + except: + result1 = html.xpath("//td[contains(text(),'シリーズ:')]/following-sibling::td/text()")[0] + return result1 +def getNum(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result1 = html.xpath("//td[contains(text(),'品番:')]/following-sibling::td/a/text()")[0] + except: + result1 = html.xpath("//td[contains(text(),'品番:')]/following-sibling::td/text()")[0] + return result1 +def getYear(getRelease): + try: + result = str(re.search('\d{4}',getRelease).group()) + return result + except: + return getRelease +def getRelease(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result1 = html.xpath("//td[contains(text(),'商品発売日:')]/following-sibling::td/a/text()")[0].lstrip('\n') + except: + result1 = html.xpath("//td[contains(text(),'商品発売日:')]/following-sibling::td/text()")[0].lstrip('\n') + return result1 +def getTag(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()")).strip(" ['']") + except: + result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/text()")).strip(" ['']") + return result1.replace("', '",",") +def getCover(htmlcode,number): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = html.xpath('//*[@id="'+number+'"]/@href')[0] + return result +def getDirector(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result1 = html.xpath("//td[contains(text(),'監督:')]/following-sibling::td/a/text()")[0] + except: + result1 = html.xpath("//td[contains(text(),'監督:')]/following-sibling::td/text()")[0] + return result1 +def getOutline(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace('\n','') + return result +def main(number): + htmlcode=get_html('https://www.dmm.co.jp/digital/videoa/-/detail/=/cid='+number) + url = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid='+number + if '404 Not Found' in htmlcode: + htmlcode=get_html('https://www.dmm.co.jp/mono/dvd/-/detail/=/cid='+number) + url = 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid='+number + dic = { + 'title': getTitle(htmlcode).strip(getActor(htmlcode)), + 'studio': getStudio(htmlcode), + 'outline': getOutline(htmlcode), + 'runtime': getRuntime(htmlcode), + 'director': getDirector(htmlcode), + 'actor': getActor(htmlcode), + 'release': getRelease(htmlcode), + 'number': getNum(htmlcode), + 'cover': getCover(htmlcode,number), + 'imagecut': 1, + 'tag': getTag(htmlcode), + 'label':getLabel(htmlcode), + 'year': getYear(getRelease(htmlcode)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': '', + 'website': url, + 'source': 'siro.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) # .encode('UTF-8') + return js + +# main('DV-1562') +# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") +#print(main('ssni00384')) diff --git a/update_check.json b/update_check.json index 1763776..68b6248 100644 --- a/update_check.json +++ b/update_check.json @@ -1,5 +1,5 @@ { - "version": "2.2", - "version_show":"2.2", + "version": "2.3", + "version_show":"2.3", "download": "https://github.com/yoshiko2/AV_Data_Capture/releases" }