From 241ff00a0bdb3c8ec6fb2d1299e708aa1f3af796 Mon Sep 17 00:00:00 2001 From: mo_yy <54110819+moyy996@users.noreply.github.com> Date: Tue, 21 Jan 2020 01:20:40 +0800 Subject: [PATCH] 2.1 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 修复javdb抓取bug --- ADC_function.py | 3 ++- AV_Data_Capture.py | 31 ++++++++++++--------- README.md | 12 +++++++++ avsox.py | 8 +++--- config.ini | 5 ++-- core.py | 40 +++++++++++++++++++++------ fc2fans_club.py | 67 +++++++++++++++++++++++++++------------------- javdb.py | 55 ++++++++++++------------------------- siro.py | 2 ++ update_check.json | 4 +-- 10 files changed, 132 insertions(+), 95 deletions(-) diff --git a/ADC_function.py b/ADC_function.py index d807061..0158bbf 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -10,7 +10,8 @@ import sys from lxml import etree import sys import io -sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) +# sys.setdefaultencoding('utf-8') config_file='config.ini' config = ConfigParser() diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 40f194e..36088e0 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -5,7 +5,6 @@ import glob import os import time import re -import sys from ADC_function import * import json import shutil @@ -15,7 +14,7 @@ os.chdir(os.getcwd()) # ============global var=========== -version='1.9' +version='2.1' config = ConfigParser() config.read(config_file, encoding='UTF-8') @@ -44,10 +43,10 @@ def UpdateCheck(): html = json.loads(str(html2)) if not version == html['version']: - print('[*] * New update ' + html['version'] + ' *') - print('[*] * Download *') + print('[*] * New update ' + html['version'] + ' *') + print('[*] ↓ Download ↓') print('[*] ' + html['download']) - print('[*]=====================================') + print('[*]======================================================') else: print('[+]Update Check disabled!') def movie_lists(): @@ -91,6 +90,8 @@ def getNumber(filepath): filepath = filepath.replace("_", "-") filepath.strip('22-sht.me').strip('-HD').strip('-hd') filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间 + if 'FC2' or 'fc2' in filename: + filename=filename.replace('-PPV','').replace('PPV-','') try: file_number = re.search('\w+-\d+', filename).group() except: # 提取类似mkbd-s120番号 @@ -126,9 +127,9 @@ def RunCore(): os.system('python3 core.py' + ' "' + i + '" --number "' + getNumber(i) + '"') # 从py文件启动(用于源码py) if __name__ =='__main__': - print('[*]===========AV Data Capture===========') - print('[*] Version '+version) - print('[*]=====================================') + print('[*]================== AV Data Capture ===================') + print('[*] Version '+version) + print('[*]======================================================') CreatFailedFolder() UpdateCheck() moveMovies() @@ -137,6 +138,8 @@ if __name__ =='__main__': count = 0 count_all = str(len(movie_lists())) print('[+]Find',str(len(movie_lists())),'movies') + if config['common']['soft_link'] == '1': + print('[!] --- Soft link mode is ENABLE! ----') for i in movie_lists(): #遍历电影列表 交给core处理 count = count + 1 percentage = str(count/int(count_all)*100)[:4]+'%' @@ -144,14 +147,18 @@ if __name__ =='__main__': try: print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]") RunCore() - print("[*]=====================================") + print("[*]======================================================") except: # 番号提取异常 print('[-]' + i + ' Cannot catch the number :') - print('[-]Move ' + i + ' to failed folder') - shutil.move(i, str(os.getcwd()) + '/' + 'failed/') + if config['common']['soft_link'] == '1': + print('[-]Link',i,'to failed folder') + os.symlink(i,str(os.getcwd()) + '/' + 'failed/') + else: + print('[-]Move ' + i + ' to failed folder') + shutil.move(i, str(os.getcwd()) + '/' + 'failed/') continue CEF(exclude_directory_1) CEF(exclude_directory_2) print("[+]All finished!!!") - input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") \ No newline at end of file + input("[+][+]Press enter key exit, you can check the error messge before you exit.") \ No newline at end of file diff --git a/README.md b/README.md index 431b822..4c4ff45 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,14 @@ pip install pillow 设置成功输出目录和失败输出目录 +--- +#### 软链接 +>[common] +>soft_link=0 + +1为开启软链接模式 +0为关闭 + --- ### 网络设置 >[proxy] @@ -225,6 +233,10 @@ pip install pillow ### 关于 ```Updata_check``` 和 ```JSON``` 相关的错误 跳转 [网络设置](#网络设置) +--- +### 关于字幕文件移动功能 +字幕文件前缀必须与影片文件前缀一致,才可以使用该功能 + --- ### 关于```FileNotFoundError: [WinError 3] 系统找不到指定的路径。: 'JAV_output''``` 在软件所在文件夹下新建 JAV_output 文件夹,可能是你没有把软件拉到和电影的同一目录 diff --git a/avsox.py b/avsox.py index ee4c79c..16ab19d 100644 --- a/avsox.py +++ b/avsox.py @@ -71,16 +71,16 @@ def getTag(a): # 获取演员 return d def main(number): - a = get_html('https://avsox.asia/cn/search/' + number) + a = get_html('https://avsox.host/cn/search/' + number) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") if result1 == '' or result1 == 'null' or result1 == 'None': - a = get_html('https://avsox.asia/cn/search/' + number.replace('-', '_')) + a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_')) print(a) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") if result1 == '' or result1 == 'null' or result1 == 'None': - a = get_html('https://avsox.asia/cn/search/' + number.replace('_', '')) + a = get_html('https://avsox.host/cn/search/' + number.replace('_', '')) print(a) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") @@ -109,4 +109,4 @@ def main(number): js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js -#print(main('041516_541')) \ No newline at end of file +#print(main('012717_472')) \ No newline at end of file diff --git a/config.ini b/config.ini index 5446524..6a735e8 100644 --- a/config.ini +++ b/config.ini @@ -2,9 +2,10 @@ main_mode=1 failed_output_folder=failed success_output_folder=JAV_output +soft_link=0 [proxy] -proxy=127.0.0.1:1081 +proxy= timeout=10 retry=3 @@ -17,7 +18,7 @@ update_check=1 [media] media_warehouse=emby -#emby or plex or kodi +#emby or plex or kodi ,emby=jellyfin [escape] literals=\() diff --git a/core.py b/core.py index e534335..dc47a65 100755 --- a/core.py +++ b/core.py @@ -128,7 +128,7 @@ def getDataFromJSON(file_number): # 从JSON返回元数据 json_data = json.loads(javdb.main(file_number)) # == elif 'fc2' in file_number or 'FC2' in file_number: - json_data = json.loads(fc2fans_club.main(file_number)) + json_data = json.loads(fc2fans_club.main(file_number.replace('fc2-','').replace('fc2_','').replace('FC2-','').replace('fc2_',''))) # == elif 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number: json_data = json.loads(avsox.main(file_number)) @@ -347,7 +347,7 @@ def PrintFiles(): for key, value in actor_photo.items(): print(" ", file=code) print(" " + key + "", file=code) - if not actor_photo == '': # or actor_photo == []: + if not value == '': # or actor_photo == []: print(" " + value + "", file=code) print(" ", file=code) except: @@ -394,7 +394,7 @@ def PrintFiles(): for key, value in actor_photo.items(): print(" ", file=code) print(" " + key + "", file=code) - if not actor_photo == '': # or actor_photo == []: + if not value == '': # or actor_photo == []: print(" " + value + "", file=code) print(" ", file=code) except: @@ -417,7 +417,7 @@ def PrintFiles(): if cn_sub == '1': print(" 中文字幕", file=code) print(" " + number + "", file=code) - print(" " + release + "", file=code) + print(" " + release + "", file=code) print(" " + cover + "", file=code) print(" " + "https://www.javbus.com/" + number + "", file=code) print("", file=code) @@ -440,7 +440,7 @@ def PrintFiles(): for key, value in actor_photo.items(): print(" ", file=code) print(" " + key + "", file=code) - if not actor_photo == '': # or actor_photo == []: + if not value == '': # or actor_photo == []: print(" " + value + "", file=code) print(" ", file=code) except: @@ -538,7 +538,19 @@ def pasteFileToFolder(filepath, path): # 文件路径,番号,后缀,要 global houzhui houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group()) try: - os.rename(filepath, path + '/' + number + c_word + houzhui) + if config['common']['soft_link'] == '1': #如果soft_link=1 使用软链接 + os.symlink(filepath, path + '/' + number + c_word + houzhui) + else: + os.rename(filepath, path + '/' + number + c_word + houzhui) + if os.path.exists(os.getcwd()+'/'+number + c_word + '.srt'): #字幕移动 + os.rename(os.getcwd()+'/'+number + c_word + '.srt', path + '/' + number + c_word + '.srt') + print('[+]Sub moved!') + elif os.path.exists(os.getcwd()+'/'+number + c_word + '.ssa'): + os.rename(os.getcwd()+'/'+number + c_word + '.ssa', path + '/' + number + c_word + '.ssa') + print('[+]Sub moved!') + elif os.path.exists(os.getcwd()+'/'+number + c_word + '.sub'): + os.rename(os.getcwd()+'/'+number + c_word + '.sub', path + '/' + number + c_word + '.sub') + print('[+]Sub moved!') except FileExistsError: print('[-]File Exists! Please check your movie!') print('[-]move to the root folder of the program.') @@ -556,7 +568,19 @@ def pasteFileToFolder_mode2(filepath, path): # 文件路径,番号,后缀 houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group()) path = success_folder + '/' + location_rule try: - os.rename(filepath, path + '/' + number + part + c_word + houzhui) + if config['common']['soft_link'] == '1': + os.symlink(filepath, path + '/' + number + part + c_word + houzhui) + else: + os.rename(filepath, path + '/' + number + part + c_word + houzhui) + if os.path.exists(number+'.srt'): #字幕移动 + os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt') + print('[+]Sub moved!') + elif os.path.exists(number + part + c_word+'.ass'): + os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass') + print('[+]Sub moved!') + elif os.path.exists(number + part + c_word+'.sub'): + os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub') + print('[+]Sub moved!') print('[!]Success') except FileExistsError: print('[-]File Exists! Please check your movie!') @@ -598,7 +622,7 @@ def debug_mode(): continue if i == 'actor_photo' or i == 'year': continue - print('[+] -', i, ':', v) + print('[+] -', i+str(9-len(i)*'-'), ':', v) print('[+] ---Debug info---') except: aaa = '' diff --git a/fc2fans_club.py b/fc2fans_club.py index 747fa4c..72cb773 100755 --- a/fc2fans_club.py +++ b/fc2fans_club.py @@ -4,43 +4,50 @@ import json import ADC_function def getTitle(htmlcode): #获取厂商 + #print(htmlcode) html = etree.fromstring(htmlcode,etree.HTMLParser()) - result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/h2/text()')).strip(" ['']") - return result + result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']") + result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1) + #print(result2) + return result2 def getActor(htmlcode): try: html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[5]/a/text()')).strip(" ['']") + result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']") return result except: return '' def getStudio(htmlcode): #获取厂商 - try: - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[5]/a/text()')).strip(" ['']") - return result - except: - return '' + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']") + return result def getNum(htmlcode): #获取番号 html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") + #print(result) return result def getRelease(htmlcode2): # + #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') html=etree.fromstring(htmlcode2,etree.HTMLParser()) result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") return result -def getCover(htmlcode2): #获取厂商 # +def getCover(htmlcode,number,htmlcode2): #获取厂商 # + #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') html = etree.fromstring(htmlcode2, etree.HTMLParser()) result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']") + if result == '': + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']") + return 'https://fc2club.com' + result2 return 'http:' + result def getOutline(htmlcode2): #获取番号 # html = etree.fromstring(htmlcode2, etree.HTMLParser()) - result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') + result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') return result def getTag(htmlcode): #获取番号 html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = html.xpath('//*[@id="container"]/div[1]/div/article/section[6]/ul/li/a/text()') - return result + result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()')) + return result.strip(" ['']").replace("'",'').replace(' ','') def getYear(release): try: result = re.search('\d{4}',release).group() @@ -49,25 +56,29 @@ def getYear(release): return '' def main(number): - number=number.replace('PPV','').replace('ppv','').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').replace('fc2ppv-','').replace('FC2PPV-','') - htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'') - #htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html') + htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+number+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') + htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html') + actor = getActor(htmlcode) + if getActor(htmlcode) == '': + actor = 'FC2系列' dic = { - 'title': getTitle(htmlcode2), - 'studio': getStudio(htmlcode2), - 'year': getYear(getRelease(htmlcode2)), + 'title': getTitle(htmlcode), + 'studio': getStudio(htmlcode), + 'year': '',#str(re.search('\d{4}',getRelease(number)).group()), 'outline': getOutline(htmlcode2), - 'runtime': getYear(getRelease(htmlcode2)), - 'director': getStudio(htmlcode2), - 'actor': getStudio(htmlcode2), - 'release': getRelease(htmlcode2), + 'runtime': getYear(getRelease(htmlcode)), + 'director': getStudio(htmlcode), + 'actor': actor, + 'release': getRelease(number), 'number': 'FC2-'+number, - 'cover': getCover(htmlcode2), + 'cover': getCover(htmlcode,number,htmlcode2), 'imagecut': 0, - 'tag': getTag(htmlcode2), + 'tag': getTag(htmlcode), 'actor_photo':'', - 'website': 'http://adult.contents.fc2.com/article_search.php?id=' + number, - 'source': 'fc2fans_club.py', + 'website': 'https://fc2club.com//html/FC2-' + number + '.html', + 'source':'https://fc2club.com//html/FC2-' + number + '.html', } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') - return js \ No newline at end of file + return js + +#print(main('1051725')) diff --git a/javdb.py b/javdb.py index 34432f1..a85980c 100755 --- a/javdb.py +++ b/javdb.py @@ -4,7 +4,6 @@ import json from bs4 import BeautifulSoup from ADC_function import * - def getTitle(a): try: html = etree.fromstring(a, etree.HTMLParser()) @@ -12,98 +11,75 @@ def getTitle(a): return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '').replace(' : ', '')) except: return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '')) - - def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', - '').lstrip( - ',').replace(',', ', ') - - + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') +def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img + a = actor.split(',') + d={} + for i in a: + p={i:''} + d.update(p) + return d def getStudio(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') - - def getRuntime(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').rstrip('mi') - - def getLabel(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') - - def getNum(a): html = etree.fromstring(a, etree.HTMLParser()) result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']") return str(result2 + result1).strip('+') - - def getYear(getRelease): try: result = str(re.search('\d{4}', getRelease).group()) return result except: return getRelease - - def getRelease(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+') - - def getTag(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', - '').lstrip( - ',') - - + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',') def getCover_small(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result = 'http:' + html.xpath( '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][1]/a[' '@class=\'box\']/div[@class=\'item-image fix-scale-cover\']/img/@src')[0] return result - - def getCover(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']") if result == '': - result = str(html.xpath('/html/body/section/div/div[3]/div[1]/a/img/@src')).strip(" ['']") + result = str(html.xpath('/html/body/section/div/div[4]/div[1]/a/img/@src')).strip(" ['']") return result - - def getDirector(a): html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']") return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') - - def getOutline(htmlcode): html = etree.fromstring(htmlcode, etree.HTMLParser()) result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") return result - - def main(number): number = number.upper() try: @@ -115,7 +91,7 @@ def main(number): 'actor': getActor(b), 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a), '').replace( - '无码', '').replace('有码', '').lstrip(' ').replace(number, ''), + '无码', '').replace('有码', '').lstrip(' ').replace(number,''), 'studio': getStudio(b), 'outline': getOutline(b), 'runtime': getRuntime(b), @@ -128,7 +104,7 @@ def main(number): 'tag': getTag(b), 'label': getLabel(b), 'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': '', + 'actor_photo': getActorPhoto(getActor(b)), 'website': 'https://javdb.com' + result1, 'source': 'javdb.py', } @@ -141,13 +117,14 @@ def main(number): a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ') html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0] + print(html.xpath('//*[@id="videos"]/div/div/a/@href')) b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ') dic = { 'actor': getActor(b), 'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace( getNum(b), '').replace( - '无码', '').replace('有码', '').lstrip(' ').replace(number, ''), + '无码', '').replace('有码', '').lstrip(' ').replace(number,''), 'studio': getStudio(b), 'outline': getOutline(b), 'runtime': getRuntime(b), @@ -160,7 +137,7 @@ def main(number): 'tag': getTag(b), 'label': getLabel(b), 'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': '', + 'actor_photo': getActorPhoto(getActor(b)), 'website': 'https://javdb.com' + result1, 'source': 'javdb.py', } @@ -170,4 +147,6 @@ def main(number): js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js +# main('DV-1562') +# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") # print(get_html('https://javdb1.com/v/WwZ0Q')) diff --git a/siro.py b/siro.py index bc4444a..c16537f 100755 --- a/siro.py +++ b/siro.py @@ -101,3 +101,5 @@ def main(number2): } js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js + +#print(main('SIRO-3607')) \ No newline at end of file diff --git a/update_check.json b/update_check.json index d1870d5..08acd50 100644 --- a/update_check.json +++ b/update_check.json @@ -1,5 +1,5 @@ { - "version": "1.9", - "version_show":"1.9", + "version": "2.1", + "version_show":"2.1", "download": "https://github.com/yoshiko2/AV_Data_Capture/releases" }