Update 2.5

This commit is contained in:
Yoshiko 2020-02-04 01:02:51 +08:00 committed by GitHub
parent 2c2867e3c6
commit a46391c6b2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 167 additions and 88 deletions

View File

@ -9,8 +9,8 @@ from ADC_function import *
from core import *
import json
import shutil
import fnmatch
from configparser import ConfigParser
import argparse
def UpdateCheck(version):
@ -26,6 +26,14 @@ def UpdateCheck(version):
else:
print('[+]Update Check disabled!')
def argparse_get_file():
parser = argparse.ArgumentParser()
parser.add_argument("file", default='',nargs='?', help="Write the file path on here")
args = parser.parse_args()
if args.file == '':
return ''
else:
return args.file
def movie_lists(escape_folder):
escape_folder = re.split('[,]', escape_folder)
@ -67,8 +75,11 @@ def CEF(path):
a = ''
def getNumber(filepath):
filepath = filepath.replace('.\\', '')
def getNumber(filepath,absolute_path = False):
if absolute_path == True:
filepath=filepath.replace('\\','/')
file_number = str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
return file_number
if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
filepath = filepath.replace("_", "-")
filepath.strip('22-sht.me').strip('-HD').strip('-hd')
@ -79,14 +90,13 @@ def getNumber(filepath):
return file_number
else: # 提取不含减号-的番号FANZA CID
try:
return str(
re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
except:
return re.search(r'(.+?)\.', filepath)[0]
if __name__ == '__main__':
version = '2.4'
version = '2.5'
config_file = 'config.ini'
config = ConfigParser()
config.read(config_file, encoding='UTF-8')
@ -102,6 +112,19 @@ if __name__ == '__main__':
os.chdir(os.getcwd())
movie_list = movie_lists(escape_folder)
#========== 野鸡番号拖动 ==========
number_argparse=argparse_get_file()
if not number_argparse == '':
print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]")
core_main(number_argparse, getNumber(number_argparse,absolute_path = True))
print("[*]======================================================")
CEF(success_folder)
CEF(failed_folder)
print("[+]All finished!!!")
input("[+][+]Press enter key exit, you can check the error messge before you exit.")
os._exit(0)
# ========== 野鸡番号拖动 ==========
count = 0
count_all = str(len(movie_list))
print('[+]Find', count_all, 'movies')
@ -121,8 +144,13 @@ if __name__ == '__main__':
print('[-]Link', i, 'to failed folder')
os.symlink(i, str(os.getcwd()) + '/' + 'failed/')
else:
print('[-]Move ' + i + ' to failed folder')
shutil.move(i, str(os.getcwd()) + '/' + 'failed/')
try:
print('[-]Move ' + i + ' to failed folder')
shutil.move(i, str(os.getcwd()) + '/' + 'failed/')
except FileExistsError:
print('[!]File exists in failed!')
except:
print('[+]skip')
continue
CEF(success_folder)

View File

@ -3,6 +3,9 @@ from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *
# import sys
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml')

14
core.py
View File

@ -85,6 +85,11 @@ def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元
actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
release = json_data['release']
number = json_data['number']
studio = json_data['studio']
source = json_data['source']
runtime = json_data['runtime']
outline = json_data['runtime']
label = json_data['label']
try:
cover_small = json_data['cover_small']
except:
@ -93,9 +98,11 @@ def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元
tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @
actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
if title == '' or number == '':
print('[-]Movie Data not found!')
moveFailedFolder(filepath, failed_folder)
return
# if imagecut == '3':
# DownloadFileWithFilename()
@ -255,12 +262,14 @@ def DownloadFileWithFilename(url, filename, path, Config, filepath, failed_folde
print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')
moveFailedFolder(filepath, failed_folder)
return
def imageDownload(option, cover, number, c_word, path, multi_part, Config, filepath, failed_folder): # 封面是否下载成功否则移动到failed
if option == 'emby':
if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) == 'failed':
moveFailedFolder(filepath, failed_folder)
return
DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
@ -284,6 +293,7 @@ def imageDownload(option, cover, number, c_word, path, multi_part, Config, filep
elif option == 'plex':
if DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
moveFailedFolder(filepath, failed_folder)
return
DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder)
if not os.path.getsize(path + '/fanart.jpg') == 0:
print('[+]Image Downloaded!', path + '/fanart.jpg')
@ -304,6 +314,7 @@ def imageDownload(option, cover, number, c_word, path, multi_part, Config, filep
elif option == 'kodi':
if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
moveFailedFolder(filepath, failed_folder)
return
DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder)
if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
@ -472,10 +483,12 @@ def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filep
print("[-]Write Failed!")
print(e)
moveFailedFolder(filepath, failed_folder)
return
except Exception as e1:
print(e1)
print("[-]Write Failed!")
moveFailedFolder(filepath, failed_folder)
return
def cutImage(option, imagecut, path, number, c_word):
@ -606,6 +619,7 @@ def get_part(filepath, failed_folder):
except:
print("[-]failed!Please rename the filename again!")
moveFailedFolder(filepath, failed_folder)
return
def debug_mode(json_data):

View File

@ -57,10 +57,10 @@ def getRelease(a):
def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try:
result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()")).strip(" ['']")
result1 = html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()")
except:
result1 = str(html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/text()")).strip(" ['']")
return result1.replace("', '",",")
result1 = html.xpath("//td[contains(text(),'ジャンル:')]/following-sibling::td/text()")
return result1
def getCover(htmlcode,number):
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = html.xpath('//*[@id="'+number+'"]/@href')[0]
@ -110,4 +110,4 @@ def main(number):
# main('DV-1562')
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
#print(main('n0635'))
#print(main('ipx292'))

View File

@ -2,6 +2,9 @@ import re
from lxml import etree#need install
import json
import ADC_function
# import sys
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(htmlcode): #获取厂商
#print(htmlcode)
@ -55,8 +58,53 @@ def getYear(release):
except:
return ''
def getTitle_fc2com(htmlcode): #获取厂商
html = etree.fromstring(htmlcode,etree.HTMLParser())
result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
return result
def getActor_fc2com(htmlcode):
try:
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
return result
except:
return ''
def getStudio_fc2com(htmlcode): #获取厂商
try:
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
return result
except:
return ''
def getNum_fc2com(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
return result
def getRelease_fc2com(htmlcode2): #
html=etree.fromstring(htmlcode2,etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
return result
def getCover_fc2com(htmlcode2): #获取厂商 #
html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
return 'http:' + result
def getOutline_fc2com(htmlcode2): #获取番号 #
html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
return result
def getTag_fc2com(number): #获取番号
htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
result = re.findall('"tag":"(.*?)"', htmlcode)
return result
def getYear_fc2com(release):
try:
result = re.search('\d{4}',release).group()
return result
except:
return ''
def main(number):
htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+number+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
actor = getActor(htmlcode)
if getActor(htmlcode) == '':
@ -65,12 +113,13 @@ def main(number):
'title': getTitle(htmlcode),
'studio': getStudio(htmlcode),
'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
'outline': getOutline(htmlcode2),
'outline': '',#getOutline(htmlcode2),
'runtime': getYear(getRelease(htmlcode)),
'director': getStudio(htmlcode),
'actor': actor,
'release': getRelease(number),
'number': 'FC2-'+number,
'label': '',
'cover': getCover(htmlcode,number,htmlcode2),
'imagecut': 0,
'tag': getTag(htmlcode),
@ -78,7 +127,31 @@ def main(number):
'website': 'https://fc2club.com//html/FC2-' + number + '.html',
'source':'https://fc2club.com//html/FC2-' + number + '.html',
}
if dic['title'] == '':
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
actor = getActor(htmlcode)
if getActor(htmlcode) == '':
actor = 'FC2系列'
dic = {
'title': getTitle_fc2com(htmlcode2),
'studio': getStudio_fc2com(htmlcode2),
'year': '', # str(re.search('\d{4}',getRelease(number)).group()),
'outline': getOutline_fc2com(htmlcode2),
'runtime': getYear_fc2com(getRelease(htmlcode2)),
'director': getStudio_fc2com(htmlcode2),
'actor': actor,
'release': getRelease_fc2com(number),
'number': 'FC2-' + number,
'cover': getCover_fc2com(htmlcode2),
'imagecut': 0,
'tag': getTag_fc2com(number),
'label': '',
'actor_photo': '',
'website': 'http://adult.contents.fc2.com/article/' + number + '/',
'source': 'http://adult.contents.fc2.com/article/' + number + '/',
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js
#print(main('1051725'))
#print(main('1252953'))

100
javdb.py
View File

@ -8,12 +8,9 @@ from ADC_function import *
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(a):
try:
html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/section/div/h2/strong/text()')).strip(" ['']")
return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '').replace(' : ', ''))
except:
return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', ''))
html = etree.fromstring(a, etree.HTMLParser())
result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
return result
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
@ -83,71 +80,32 @@ def getOutline(htmlcode):
return result
def main(number):
number = number.upper()
try:
a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0]
b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ')
dic = {
'actor': getActor(b),
'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(getNum(a),
'').replace(
'无码', '').replace('有码', '').lstrip(' ').replace(number,''),
'studio': getStudio(b),
'outline': getOutline(b),
'runtime': getRuntime(b),
'director': getDirector(b),
'release': getRelease(b),
'number': getNum(b),
'cover': getCover(b),
'cover_small': getCover_small(a),
'imagecut': 3,
'tag': getTag(b),
'label': getLabel(b),
'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(getActor(b)),
'website': 'https://javdb.com' + result1,
'source': 'javdb.py',
}
if getNum(b) != number: # 与搜索到的番号不匹配
dic['title'] = ''
dic['number'] = ''
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
except:
a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0]
print(html.xpath('//*[@id="videos"]/div/div/a/@href'))
b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ')
dic = {
'actor': getActor(b),
'title': getTitle(b).replace("\\n", '').replace(' ', '').replace(getActor(a), '').replace(
getNum(b),
'').replace(
'无码', '').replace('有码', '').lstrip(' ').replace(number,''),
'studio': getStudio(b),
'outline': getOutline(b),
'runtime': getRuntime(b),
'director': getDirector(b),
'release': getRelease(b),
'number': getNum(b),
'cover': getCover(b),
'cover_small': getCover_small(a),
'imagecut': 3,
'tag': getTag(b),
'label': getLabel(b),
'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(getActor(b)),
'website': 'https://javdb3.com' + result1,
'source': 'javdb.py',
}
if getNum(b) != number: # 与搜索到的番号不匹配
dic['title'] = ''
dic['number'] = ''
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
a = get_html('https://javdb.com/search?q=' + number + '&f=all')
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0]
b = get_html('https://javdb.com' + result1)
dic = {
'actor': getActor(b),
'title': getTitle(b),
'studio': getStudio(b),
'outline': getOutline(b),
'runtime': getRuntime(b),
'director': getDirector(b),
'release': getRelease(b),
'number': getNum(b),
'cover': getCover(b),
'cover_small': getCover_small(a),
'imagecut': 3,
'tag': getTag(b),
'label': getLabel(b),
'year': getYear(getRelease(b)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(getActor(b)),
'website': 'https://javdb.com' + result1,
'source': 'javdb.py',
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js
# main('DV-1562')
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
# print(main('YMDD-178'))
#print(main('ipx-292'))

View File

@ -3,6 +3,9 @@ from lxml import etree
import json
from bs4 import BeautifulSoup
from ADC_function import *
# import sys
# import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(a):
try:

View File

@ -1,5 +1,5 @@
{
"version": "2.4",
"version_show":"2.4",
"version": "2.5",
"version_show":"2.5",
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
}