Compare commits

..

No commits in common. "master" and "2.8" have entirely different histories.
master ... 2.8

53 changed files with 1860 additions and 3143 deletions

1
.gitattributes vendored
View File

@ -1 +0,0 @@
*.py text=auto eol=lf

1
.gitignore vendored
View File

@ -1,4 +1,3 @@
*.DS_Store
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/
*.py[cod] *.py[cod]

2
.idea/.gitignore generated vendored
View File

@ -1,2 +0,0 @@
# Default ignored files
/workspace.xml

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="jdk" jdkName="Python 3.8 (AV_Data_Capture)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -1,19 +0,0 @@
<component name="ProjectDictionaryState">
<dictionary name="tanpengsccd">
<words>
<w>avsox</w>
<w>emby</w>
<w>fanart</w>
<w>fanza</w>
<w>javbus</w>
<w>javdb</w>
<w>jellyfin</w>
<w>khtml</w>
<w>kodi</w>
<w>mgstage</w>
<w>plex</w>
<w>pondo</w>
<w>rmvb</w>
</words>
</dictionary>
</component>

View File

@ -1,6 +0,0 @@
<component name="InspectionProjectProfileManager">
<settings>
<option name="USE_PROJECT_PROFILE" value="false" />
<version value="1.0" />
</settings>
</component>

7
.idea/misc.xml generated
View File

@ -1,7 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="JavaScriptSettings">
<option name="languageLevel" value="ES6" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (AV_Data_Capture)" project-jdk-type="Python SDK" />
</project>

8
.idea/modules.xml generated
View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/AV_Data_Capture.iml" filepath="$PROJECT_DIR$/.idea/AV_Data_Capture.iml" />
</modules>
</component>
</project>

6
.idea/other.xml generated
View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="PySciProjectComponent">
<option name="PY_SCI_VIEW_SUGGESTED" value="true" />
</component>
</project>

6
.idea/vcs.xml generated
View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

View File

@ -1,127 +1,121 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import requests import requests
from configparser import ConfigParser from configparser import ConfigParser
import os import os
import re import re
import time import time
import sys import sys
from lxml import etree from lxml import etree
import sys import sys
import io import io
from ConfigApp import ConfigApp # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) # sys.setdefaultencoding('utf-8')
# sys.setdefaultencoding('utf-8')
config_file='config.ini'
# config_file='config.ini' config = ConfigParser()
# config = ConfigParser()
if os.path.exists(config_file):
# if os.path.exists(config_file): try:
# try: config.read(config_file, encoding='UTF-8')
# config.read(config_file, encoding='UTF-8') except:
# except: print('[-]Config.ini read failed! Please use the offical file!')
# print('[-]Config.ini read failed! Please use the offical file!') else:
# else: print('[+]config.ini: not found, creating...',end='')
# print('[+]config.ini: not found, creating...',end='') with open("config.ini", "wt", encoding='UTF-8') as code:
# with open("config.ini", "wt", encoding='UTF-8') as code: print("[common]", file=code)
# print("[common]", file=code) print("main_mode = 1", file=code)
# print("main_mode = 1", file=code) print("failed_output_folder = failed", file=code)
# print("failed_output_folder = failed", file=code) print("success_output_folder = JAV_output", file=code)
# print("success_output_folder = JAV_output", file=code) print("", file=code)
# print("", file=code) print("[proxy]",file=code)
# print("[proxy]",file=code) print("proxy=127.0.0.1:1081",file=code)
# print("proxy=127.0.0.1:1081",file=code) print("timeout=10", file=code)
# print("timeout=10", file=code) print("retry=3", file=code)
# print("retry=3", file=code) print("", file=code)
# print("", file=code) print("[Name_Rule]", file=code)
# print("[Name_Rule]", file=code) print("location_rule=actor+'/'+number",file=code)
# print("location_rule=actor+'/'+number",file=code) print("naming_rule=number+'-'+title",file=code)
# print("naming_rule=number+'-'+title",file=code) print("", file=code)
# print("", file=code) print("[update]",file=code)
# print("[update]",file=code) print("update_check=1",file=code)
# print("update_check=1",file=code) print("", file=code)
# print("", file=code) print("[media]", file=code)
# print("[media]", file=code) print("media_warehouse=emby", file=code)
# print("media_warehouse=emby", file=code) print("#emby plex kodi", file=code)
# print("#emby plex kodi", file=code) print("", file=code)
# print("", file=code) print("[escape]", file=code)
# print("[escape]", file=code) print("literals=\\", file=code)
# print("literals=\\", file=code) print("", file=code)
# print("", file=code) print("[movie_location]", file=code)
# print("[movie_location]", file=code) print("path=", file=code)
# print("path=", file=code) print("", file=code)
# print("", file=code) print('.',end='')
# print('.',end='') time.sleep(2)
# time.sleep(2) print('.')
# print('.') print('[+]config.ini: created!')
# print('[+]config.ini: created!') print('[+]Please restart the program!')
# print('[+]Please restart the program!') time.sleep(4)
# time.sleep(4) os._exit(0)
# os._exit(0) try:
# try: config.read(config_file, encoding='UTF-8')
# config.read(config_file, encoding='UTF-8') except:
# except: print('[-]Config.ini read failed! Please use the offical file!')
# print('[-]Config.ini read failed! Please use the offical file!')
def get_network_settings():
config = ConfigApp() try:
proxy = config["proxy"]["proxy"]
timeout = int(config["proxy"]["timeout"])
def get_network_settings(): retry_count = int(config["proxy"]["retry"])
try: assert timeout > 0
proxy = config.proxy assert retry_count > 0
timeout = int(config.timeout) except:
retry_count = int(config.retry) raise ValueError("[-]Proxy config error! Please check the config.")
assert timeout > 0 return proxy, timeout, retry_count
assert retry_count > 0
except: def getDataState(json_data): # 元数据获取失败检测
raise ValueError("[-]Proxy config error! Please check the config.") if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
return proxy, timeout, retry_count return 0
else:
def getDataState(json_data): # 元数据获取失败检测 return 1
if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
return 0 def ReadMediaWarehouse():
else: return config['media']['media_warehouse']
return 1
def UpdateCheckSwitch():
def ReadMediaWarehouse(): check=str(config['update']['update_check'])
return config.media_server if check == '1':
return '1'
def UpdateCheckSwitch(): elif check == '0':
check=str(config.update_check) return '0'
if check == '1': elif check == '':
return '1' return '0'
elif check == '0':
return '0' def getXpathSingle(htmlcode,xpath):
elif check == '': html = etree.fromstring(htmlcode, etree.HTMLParser())
return '0' result1 = str(html.xpath(xpath)).strip(" ['']")
return result1
def getXpathSingle(htmlcode,xpath):
html = etree.fromstring(htmlcode, etree.HTMLParser()) def get_html(url,cookies = None):#网页请求核心
result1 = str(html.xpath(xpath)).strip(" ['']") proxy, timeout, retry_count = get_network_settings()
return result1 i = 0
while i < retry_count:
def get_html(url,cookies = None):#网页请求核心 try:
proxy, timeout, retry_count = get_network_settings() if not proxy == '':
i = 0 proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
print(url) headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
while i < retry_count: getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
try: getweb.encoding = 'utf-8'
if not proxy == '': return getweb.text
proxies = {"http": proxy, "https": proxy} else:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies) getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
getweb.encoding = 'utf-8' getweb.encoding = 'utf-8'
return getweb.text return getweb.text
else: except:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} i += 1
getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies) print('[-]Connect retry '+str(i)+'/'+str(retry_count))
getweb.encoding = 'utf-8' print('[-]Connect Failed! Please check your Proxy or Network!')
return getweb.text
except Exception as e:
print(e)
i += 1
print('[-]Connect retry '+str(i)+'/'+str(retry_count))
print('[-]Connect Failed! Please check your Proxy or Network!')

View File

@ -1,416 +1,162 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import glob import glob
import os import os
import time import time
import fuckit import re
from tenacity import retry, stop_after_delay, wait_fixed from ADC_function import *
import json from core import *
import shutil import json
import itertools import shutil
import argparse from configparser import ConfigParser
from pathlib import Path import argparse
from core import *
from ConfigApp import ConfigApp def UpdateCheck(version):
from PathNameProcessor import PathNameProcessor if UpdateCheckSwitch() == '1':
html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
# TODO 封装聚合解耦CORE html = json.loads(str(html2))
# TODO (学习)统一依赖管理工具
# TODO 不同媒体服务器尽量兼容统一一种元数据 如nfo 海报等embyjellyfinplex if not version == html['version']:
# TODO 字幕整理功能 文件夹中读取所有字幕 并提番号放入对应缓存文件夹中TEMP print('[*] * New update ' + html['version'] + ' *')
print('[*] ↓ Download ↓')
config = ConfigApp() print('[*] ' + html['download'])
print('[*]======================================================')
else:
def safe_list_get(list_in, idx, default=None): print('[+]Update Check disabled!')
"""
数组安全取值 def argparse_get_file():
:param list_in: parser = argparse.ArgumentParser()
:param idx: parser.add_argument("file", default='',nargs='?', help="Write the file path on here")
:param default: args = parser.parse_args()
:return: if args.file == '':
""" return ''
try: else:
return list_in[idx] return args.file
except IndexError:
return default def movie_lists(escape_folder):
escape_folder = re.split('[,]', escape_folder)
total = []
def UpdateCheck(version): file_type = ['.mp4', '.avi', '.rmvb', '.wmv', '.mov', '.mkv', '.flv', '.ts', '.webm', '.MP4', '.AVI', '.RMVB', '.WMV','.MOV', '.MKV', '.FLV', '.TS', '.WEBM', ]
if UpdateCheckSwitch() == '1': file_root = os.getcwd()
html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json') for root, dirs, files in os.walk(file_root):
html = json.loads(str(html2)) flag_escape = 0
for folder in escape_folder:
if not version == html['version']: if folder in root:
print('[*] * New update ' + html['version'] + ' *') flag_escape = 1
print('[*] ↓ Download ↓') break
print('[*] ' + html['download']) if flag_escape == 1:
print('[*]======================================================') continue
else: for f in files:
print('[+]Update Check disabled!') if os.path.splitext(f)[1] in file_type:
path = os.path.join(root, f)
path = path.replace(file_root, '.')
def argparse_get_file(): total.append(path)
parser = argparse.ArgumentParser() return total
parser.add_argument("file", default='', nargs='?', help="Write the file path on here")
args = parser.parse_args()
if args.file == '': def CreatFailedFolder(failed_folder):
return '' if not os.path.exists(failed_folder + '/'): # 新建failed文件夹
else: try:
return args.file os.makedirs(failed_folder + '/')
except:
print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
def movie_lists(escape_folders): os._exit(0)
escape_folders = re.split('[,]', escape_folders)
total = []
def CEF(path):
for root, dirs, files in os.walk(config.search_folder): try:
if root in escape_folders: files = os.listdir(path) # 获取路径下的子文件(夹)列表
continue for file in files:
for file in files: os.removedirs(path + '/' + file) # 删除这个空文件夹
if re.search(PathNameProcessor.pattern_of_file_name_suffixes, file, re.IGNORECASE): print('[+]Deleting empty folder', path + '/' + file)
path = os.path.join(root, file) except:
total.append(path) a = ''
return total
def getNumber(filepath,absolute_path = False):
# def CEF(path): if absolute_path == True:
# try: filepath=filepath.replace('\\','/')
# files = os.listdir(path) # 获取路径下的子文件(夹)列表 file_number = str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
# for file in files: return file_number
# os.removedirs(path + '/' + file) # 删除这个空文件夹 if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
# print('[+]Deleting empty folder', path + '/' + file) filepath = filepath.replace("_", "-")
# except: filepath.strip('22-sht.me').strip('-HD').strip('-hd')
# a = '' filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
# if 'FC2' or 'fc2' in filename:
filename = filename.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-','FC2-').replace('FC2PPV_','FC2-')
file_number = re.search(r'\w+-\w+', filename, re.A).group()
def get_numbers(paths): return file_number
"""提取对应路径的番号+集数""" else: # 提取不含减号-的番号FANZA CID
try:
def get_number(filepath, absolute_path=False): return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
""" except:
获取番号集数 return re.search(r'(.+?)\.', filepath)[0]
:param filepath:
:param absolute_path:
:return: if __name__ == '__main__':
""" version = '2.8'
name = filepath.upper() # 转大写 config_file = 'config.ini'
if absolute_path: config = ConfigParser()
name = name.replace('\\', '/') config.read(config_file, encoding='UTF-8')
# 移除干扰字段 success_folder = config['common']['success_output_folder']
name = PathNameProcessor.remove_distractions(name) failed_folder = config['common']['failed_output_folder'] # 失败输出目录
# 抽取 文件路径中可能存在的尾部集数,和抽取尾部集数的后的文件路径 escape_folder = config['escape']['folders'] # 多级目录刮削需要排除的目录
suffix_episode, name = PathNameProcessor.extract_suffix_episode(name) print('[*]================== AV Data Capture ===================')
# 抽取 文件路径中可能存在的 番号后跟随的集数 和 处理后番号 print('[*] Version ' + version)
episode_behind_code, code_number = PathNameProcessor.extract_code(name) print('[*]======================================================')
# 无番号 则设置空字符
code_number = code_number if code_number else '' UpdateCheck(version)
# 优先取尾部集数,无则取番号后的集数(几率低),都无则为空字符 CreatFailedFolder(failed_folder)
episode = suffix_episode if suffix_episode else episode_behind_code if episode_behind_code else '' os.chdir(os.getcwd())
movie_list = movie_lists(escape_folder)
return code_number, episode
#========== 野鸡番号拖动 ==========
maps = {} number_argparse=argparse_get_file()
for path in paths: if not number_argparse == '':
number, episode = get_number(path) print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]")
maps[path] = (number, episode) core_main(number_argparse, getNumber(number_argparse,absolute_path = True))
print("[*]======================================================")
return maps CEF(success_folder)
CEF(failed_folder)
print("[+]All finished!!!")
def create_folder(paths): input("[+][+]Press enter key exit, you can check the error messge before you exit.")
for path_to_make in paths: os._exit(0)
if path_to_make: # ========== 野鸡番号拖动 ==========
try:
os.makedirs(path_to_make) count = 0
except FileExistsError as e: count_all = str(len(movie_list))
# name = f'{folder=}'.split('=')[0].split('.')[-1] print('[+]Find', count_all, 'movies')
print(path_to_make + " 已经存在") if config['common']['soft_link'] == '1':
pass print('[!] --- Soft link mode is ENABLE! ----')
except Exception as exception: for i in movie_list: # 遍历电影列表 交给core处理
print('! 创建文件夹 ' + path_to_make + ' 失败,文件夹路径错误或权限不够') count = count + 1
raise exception percentage = str(count / int(count_all) * 100)[:4] + '%'
else: print('[!] - ' + percentage + ' [' + str(count) + '/' + count_all + '] -')
raise Exception('!创建的文件夹路径为空,请确认') # print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]")
# core_main(i, getNumber(i))
# print("[*]======================================================")
if __name__ == '__main__': try:
version = '2.8.2' print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]")
core_main(i, getNumber(i))
print('[*]================== AV Data Capture ===================') print("[*]======================================================")
print('[*] Version ' + version) except: # 番号提取异常
print('[*]======================================================') print('[-]' + i + ' Cannot catch the number :')
if config['common']['soft_link'] == '1':
# UpdateCheck(version) print('[-]Link', i, 'to failed folder')
os.symlink(i, str(os.getcwd()) + '/' + failed_folder + '/')
CreatFailedFolder(config.failed_folder) else:
os.chdir(os.getcwd()) try:
print('[-]Move ' + i + ' to failed folder')
# 创建文件夹 shutil.move(i, str(os.getcwd()) + '/' + failed_folder + '/')
create_folder([config.failed_folder, config.search_folder, config.temp_folder]) except FileExistsError:
print('[!]File exists in failed!')
# temp 文件夹中infos放 番号json信息pics中放图片信息 except:
path_infos = config.temp_folder + '/infos' print('[+]skip')
path_pics = config.temp_folder + '/pics' continue
create_folder([path_infos, path_pics]) CEF(success_folder)
CEF(failed_folder)
# 遍历搜索目录下所有视频的路径 print("[+]All finished!!!")
movie_list = movie_lists(config.escape_folder) input("[+][+]Press enter key exit, you can check the error messge before you exit.")
# 以下是从文本中提取测试的数据
# f = open('TestPathNFO.txt', 'r')
# f = open('TestPathSpecial.txt', 'r')
# movie_list = [line[:-1] for line in f.readlines()]
# f.close()
# 获取 番号,集数,路径 的字典->list
code_ep_paths = [[codeEposode[0], codeEposode[1], path] for path, codeEposode in get_numbers(movie_list).items()]
[print(i) for i in code_ep_paths]
# 按番号分组片子列表(重点),用于寻找相同番号的片子
'''
这里利用pandas分组 "https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html"
'''
# # 设置打印时显示所有列
# pd.set_option('display.max_columns', None)
# # 显示所有行
# pd.set_option('display.max_rows', None)
# # 设置value的显示长度为100默认为50
# pd.set_option('max_colwidth', 30)
# # 创建框架
# df = pd.DataFrame(code_ep_paths, columns=('code', 'ep', 'path'))
# # 以番号分组
# groupedCode_code_ep_paths = df.groupby(['code'])
# # print(df.groupby(['code', 'ep']).describe().unstack())
# grouped_code_ep = df.groupby(['code', 'ep'])['path']
#
sorted_code_list = sorted(code_ep_paths, key=lambda code_ep_path: code_ep_path[0])
group_code_list = itertools.groupby(sorted_code_list, key=lambda code_ep_path: code_ep_path[0])
def group_code_list_to_dict(group_code_list):
data_dict = {}
for code, code_ep_path_group in group_code_list:
code_ep_path_list = list(code_ep_path_group)
eps_of_code = {}
group_ep_list = itertools.groupby(code_ep_path_list, key=lambda code_ep_path: code_ep_path[1])
for ep, group_ep_group in group_ep_list:
group_ep_list = list(group_ep_group)
eps_of_code[ep] = [code_ep_path[2] for code_ep_path in group_ep_list]
data_dict[code] = eps_of_code
return data_dict
def print_same_code_ep_path(data_dict_in):
for code_in in data_dict_in:
ep_path_list = data_dict_in[code_in]
if len(ep_path_list) > 1:
print('--' * 60)
print("|" + (code_in if code_in else 'unknown') + ":")
# group_ep_list = itertools.groupby(code_ep_path_list.items(), key=lambda code_ep_path: code_ep_path[0])
for ep in ep_path_list:
path_list = ep_path_list[ep]
print('--' * 12)
ep = ep if ep else ' '
if len(path_list) == 1:
print('| 集数:' + ep + ' 文件: ' + path_list[0])
else:
print('| 集数:' + ep + ' 文件: ')
for path in path_list:
print('| ' + path)
else:
pass
# 分好组的数据 {code:{ep:[path]}}
data_dict_groupby_code_ep = group_code_list_to_dict(group_code_list)
print('--' * 100)
print("找到影片数量:" + str(len(movie_list)))
print("合计番号数量:" + str(len(data_dict_groupby_code_ep)) + " (多个相同番号的影片只统计一个,不能识别的番号 都统一为'unknown')")
print('Warning:!!!! 以下为相同番号的电影明细')
print('' + '--' * 80)
print_same_code_ep_path(data_dict_groupby_code_ep)
print('' + '--' * 80)
isContinue = input('任意键继续? N 退出 \n')
if isContinue.strip(' ') == "N":
exit(1)
# ========== 野鸡番号拖动 ==========
# number_argparse = argparse_get_file()
# if not number_argparse == '':
# print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,
# absolute_path=True) + "]")
# nfo = core_main(number_argparse, getNumber(number_argparse, absolute_path=True))
# print("[*]======================================================")
# CEF(config.success_folder)
# CEF(config.failed_folder)
# print("[+]All finished!!!")
# input("[+][+]Press enter key exit, you can check the error messge before you exit.")
# os._exit(0)
# ========== 野鸡番号拖动 ==========
def download_code_infos(code_list, is_read_cache=True):
"""
遍历按番号分组的集合刮取番号信息并缓存
:param is_read_cache: 是否读取缓存数据
:param code_list:
:return: {code:nfo}
"""
count_all_grouped = len(code_list)
count = 0
code_info_dict = {}
for code in code_list:
count = count + 1
percentage = str(count / int(count_all_grouped) * 100)[:4] + '%'
print('[!] - ' + percentage + ' [' + str(count) + '/' + str(count_all_grouped) + '] -')
try:
print("[!]搜刮数据 [" + code + "]")
if code:
# 创建番号的文件夹
file_path = path_infos + '/' + code + '.json'
nfo = {}
# 读取缓存信息,如果没有则联网搜刮
path = Path(file_path)
if is_read_cache and (path.exists() and path.is_file() and path.stat().st_size > 0):
print('找到缓存信息')
with open(file_path) as fp:
nfo = json.load(fp)
else:
# 核心功能 - 联网抓取信息字典
print('联网搜刮')
nfo = core_main(code)
print('正在写入', end='')
# 把缓存信息写入缓存文件夹中,有时会设备占用而失败,重试即可
@retry(stop=stop_after_delay(3), wait=wait_fixed(2))
def read_file():
with open(file_path, 'w') as fp:
json.dump(nfo, fp)
read_file()
print('完成!')
# 将番号信息放入字典
code_info_dict[code] = nfo
print("[*]======================================================")
except Exception as e: # 番号的信息获取失败
code_info_dict[code] = ''
print("找不到信息:" + code + ',Reason:' + str(e))
# if config.soft_link:
# print('[-]Link', file_path_name, 'to failed folder')
# os.symlink(file_path_name, config.failed_folder + '/')
# else:
# try:
# print('[-]Move ' + file_path_name + ' to failed folder:' + config.failed_folder)
# shutil.move(file_path_name, config.failed_folder + '/')
# except FileExistsError:
# print('[!]File exists in failed!')
# except:
# print('[+]skip')
continue
return code_info_dict
print('----------------------------------')
code_infos = download_code_infos(data_dict_groupby_code_ep)
print("----未找到番号数据的番号----")
print([print(code) for code in code_infos if code_infos[code] == ''])
print("-------------------------")
def download_images_of_nfos(code_info_dict):
"""
遍历番号信息下载番号电影的海报图片
:param code_info_dict:
:return: 无图片的信息的番号
"""
code_list_empty_image = []
for code in code_info_dict:
nfo = code_info_dict[code]
if len(nfo.keys()) == 0:
code_list_empty_image.append(code)
continue
code_pics_folder_to_save = path_pics + '/' + code
# 1 创建 番号文件夹
os.makedirs(code_pics_folder_to_save, exist_ok=True)
# 下载缩略图
if nfo['imagecut'] == 3: # 3 是缩略图
path = Path(code_pics_folder_to_save + '/' + 'thumb.png')
if path.exists() and path.is_file() and path.stat().st_size > 0:
print(code + ':缩略图已有缓存')
else:
print(code + ':缩略图下载中...')
download_file(nfo['cover_small'], code_pics_folder_to_save, 'thumb.png')
print(code + ':缩略图下载完成')
# 下载海报
path = Path(code_pics_folder_to_save + '/' + 'poster.png')
if path.exists() and path.is_file() and path.stat().st_size > 0:
print(code + ':海报已有缓存')
else:
print(code + ':海报下载中...')
download_file(nfo['cover'], code_pics_folder_to_save, 'poster.png')
print(code + ':海报下载完成')
return code_list_empty_image
code_list_empty = download_images_of_nfos(code_infos)
print("----未找到集数的番号----")
print([print(code) for code in code_list_empty])
print("------搜刮未找到集数的番号------")
code_infos_of_no_ep = download_code_infos(code_list_empty, is_read_cache=False)
print("----还是未找到番号数据的番号----")
print([print(code) for code in code_infos_of_no_ep if code_infos_of_no_ep[code] == ''])
print("----------------------")
# 开始操作
# # 2 创建缩略图海报
# if nfo['imagecut'] == 3: # 3 是缩略图
# download_cover_file(nfo['cover_small'], code, code_pics_folder_to_save)
# # 3 创建图
# download_image(nfo['cover'], code, code_pics_folder_to_save)
# # 4 剪裁
# crop_image(nfo['imagecut'], code, code_pics_folder_to_save)
# # 5 背景图
# copy_images_to_background_image(code, code_pics_folder_to_save)
# 6 创建 mame.nfo(不需要需要时从infos中josn文件转为nfo文件)
# make_nfo_file(nfo, code, temp_path_to_save)
# 相同番号处理:按集数添加-CD[X];视频格式 and 大小 分;
# TODO 方式1 刮削添加nfo封面内容截图等
# 6 创建 mame.nfo(不需要需要时从infos中josn文件转为nfo文件)
make_nfo_file(nfo, code, temp_path_to_save)
# TODO 方式2 整理:按规则移动影片,字幕 到 演员,发行商,有无🐎 等
# if config.program_mode == '1':
# if multi_part == 1:
# number += part # 这时number会被附加上CD1后缀
# smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, filepath, config.failed_folder) # 检查小封面
# imageDownload(option, json_data['cover'], number, c_word, path, multi_part, filepath, config.failed_folder) # creatFoder会返回番号路径
# cutImage(option, imagecut, path, number, c_word) # 裁剪图
# copyRenameJpgToBackdrop(option, path, number, c_word)
# PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, config.failed_folder, tag) # 打印文件 .nfo
# pasteFileToFolder(filepath, path, number, c_word) # 移动文件
# # =======================================================================整理模式
# elif config.program_mode == '2':
# pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件
# CEF(config.success_folder)
# CEF(config.failed_folder)
print("[+]All finished!!!")
input("[+][+]Press enter key exit, you can check the error message before you exit.")

View File

@ -1,28 +0,0 @@
from configparser import ConfigParser
from MediaServer import MediaServer
class ConfigApp:
def __init__(self):
config_file = 'config.ini'
config = ConfigParser()
config.read(config_file, encoding='UTF-8')
self.success_folder = config['common']['success_output_folder']
self.failed_folder = config['common']['failed_output_folder'] # 失败输出目录
self.escape_folder = config['escape']['folders'] # 多级目录刮削需要排除的目录
self.search_folder = config['common']['search_folder'] # 搜索路径
self.temp_folder = config['common']['temp_folder'] # 临时资源路径
self.soft_link = (config['common']['soft_link'] == 1)
# self.escape_literals = (config['escape']['literals'] == 1)
self.naming_rule = config['Name_Rule']['naming_rule']
self.location_rule = config['Name_Rule']['location_rule']
self.proxy = config['proxy']['proxy']
self.timeout = float(config['proxy']['timeout'])
self.retry = int(config['proxy']['retry'])
self.media_server = MediaServer[config['media']['media_warehouse']]
self.update_check = config['update']['update_check']
self.debug_mode = config['debug_mode']['switch']

0
LICENSE Executable file → Normal file
View File

View File

@ -1,19 +0,0 @@
import pandas as pd
import numpy as np
df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
'foo', 'bar', 'foo', 'foo'],
'B': ['one', 'one', 'two', 'three',
'two', 'two', 'one', 'three'],
'C': np.random.randn(8),
'D': np.random.randn(8)})
print(df)
groupedA = df.groupby('A').describe()
groupedAB = df.groupby(['A', 'B'])['C']
print('---'*18)
for a, b in groupedAB:
print('--'*18)
print(a)
print('-' * 18)
print(b)

View File

@ -1,38 +0,0 @@
import pandas as pd
import numpy as np
'''
python数据处理三剑客之一pandas
https://pandas.pydata.org/pandas-docs/stable/user_guide
https://www.pypandas.cn/docs/getting_started/10min.html
'''
dates = pd.date_range('20130101', periods=6)
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
print(dates)
print(df)
df2 = pd.DataFrame({'A': 1.,
'B': pd.Timestamp('20130102'),
'C': pd.Series(1, index=list(range(4)), dtype='float32'),
'D': np.array([3] * 4, dtype='int32'),
'E': pd.Categorical(["test", "train", "test", "train"]),
'F': 'foo'})
print(df2)
print(df2.dtypes)
print(df.head())
print(df.tail(5))
print(df.index)
print(df.columns)
df.describe() # 统计数据摘要
df.T # index columns互转
df.sort_index(axis=1, ascending=False) # 排序axis=1 是columnsaxis=1 是index
df.sort_values(by='B') # 按值排序 按B列中的值排序
# 切行
df.A
df['A']
# 切行
df['20130102':'20130104']
df[0:3]

View File

@ -1,28 +0,0 @@
from enum import Enum, auto
class MediaServer(Enum):
EMBY = auto()
PLEX = auto()
KODI = auto()
# media = EMBY
#
# def __init__(self, arg):
# self = [e for e in MediaServer if arg.upper() == self.name]
def poster_name(self, name):
if self == MediaServer.EMBY: # 保存[name].png
return name + '.png'
elif self == MediaServer.KODI: # 保存[name]-poster.jpg
return name + '-poster.jpg'
elif self == MediaServer.PLEX: # 保存 poster.jpg
return 'poster.jpg'
def image_name(self, name):
if self == MediaServer.EMBY: # name.jpg
return name + '.jpg'
elif self == MediaServer.KODI: # [name]-fanart.jpg
return name + '-fanart.jpg'
elif self == MediaServer.PLEX: # fanart.jpg
return 'fanart.jpg'

View File

@ -1,3 +0,0 @@
from addict import Dict
# class Metadata:

View File

@ -1,115 +0,0 @@
import re
import fuckit
class PathNameProcessor:
# 类变量
pattern_of_file_name_suffixes = r'.(mov|mp4|avi|rmvb|wmv|mov|mkv|flv|ts|m2ts)$'
# def __init__(self):
@staticmethod
def remove_distractions(origin_name):
"""移除干扰项"""
# 移除文件类型后缀
origin_name = re.sub(PathNameProcessor.pattern_of_file_name_suffixes, '', origin_name, 0, re.IGNORECASE)
# 处理包含减号-和_的番号'/-070409_621'
origin_name = re.sub(r'[-_~*# ]', "-", origin_name, 0)
origin_name = re.sub(r'(Carib)(bean)?', '-', origin_name, 0, re.IGNORECASE)
origin_name = re.sub(r'(1pondo)', '-', origin_name, 0, re.IGNORECASE)
origin_name = re.sub(r'(tokyo)[-. ]?(hot)', '-', origin_name, 0, re.IGNORECASE)
origin_name = re.sub(r'Uncensored', '-', origin_name, 0, re.IGNORECASE)
origin_name = re.sub(r'JAV', '-', origin_name, 0, re.IGNORECASE)
# 移除干扰字段
origin_name = origin_name.replace('22-sht.me', '-')
# 去除文件名中时间 1970-2099年 月 日
pattern_of_date = r'(?:-)(19[789]\d|20\d{2})(-?(0\d|1[012])-?(0[1-9]|[12]\d|3[01])?)?[-.]'
# 移除字母开头 清晰度相关度 字符
pattern_of_resolution_alphas = r'(?<![a-zA-Z])(SD|((F|U)|(Full|Ultra)[-_*. ~]?)?HD|BD|(blu[-_*. ~]?ray)|[hx]264|[hx]265|HEVC)'
# 数字开头的 清晰度相关度 字符
pattern_of_resolution_numbers = r'(?<!\d)(4K|(1080[ip])|(720p)|(480p))'
origin_name = re.sub(pattern_of_resolution_alphas, "-", origin_name, 0, re.IGNORECASE)
origin_name = re.sub(pattern_of_resolution_numbers, "-", origin_name, 0, re.IGNORECASE)
origin_name = re.sub(pattern_of_date, "-", origin_name)
if 'FC2' or 'fc2' in origin_name:
origin_name = origin_name.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-', 'FC2-').replace(
'FC2PPV_', 'FC2-')
# 移除连续重复无意义符号-
origin_name = re.sub(r"([-.])(\1+)", r"\1", origin_name)
# 移除尾部无意义符号 方便识别剧集数
origin_name = re.sub(r'[-.]+$', "", origin_name)
return origin_name
@staticmethod
def extract_suffix_episode(origin_name):
""" 提取尾部集数号 123ABC(只识别一位) part1 ipz.A CD1 NOP019B.HD.wmv"""
episode = None
with fuckit:
# 零宽断言获取尾部数字 剧集数 123
pattern_episodes_number = r'(?<!\d)\d$'
episode = re.findall(pattern_episodes_number, origin_name)[-1]
origin_name = re.sub(pattern_episodes_number, "", origin_name)
with fuckit:
# 零宽断言获取尾部字幕 剧集数 abc
pattern_episodes_alpha = r'(?<![a-zA-Z])[a-zA-Z]$'
episode = re.findall(pattern_episodes_alpha, origin_name)[-1]
origin_name = re.sub(pattern_episodes_alpha, "", origin_name)
return episode, origin_name
@staticmethod
def extract_code(origin_name):
"""
提取集数和 规范过的番号
"""
name = None
episode = None
with fuckit:
# 找到含- 或不含-的 番号1. 数字+数字 2. 字母+数字
name = re.findall(r'(?:\d{2,}-\d{2,})|(?:[A-Z]+-?[A-Z]*\d{2,})', origin_name)[-1]
episode = PathNameProcessor.extract_episode_behind_code(origin_name, name)
# 将未-的名字处理加上 -
if not ('-' in name):
# 无减号-的番号,尝试分段加上-
# 非贪婪匹配非特殊字符零宽断言后数字至少2位连续,ipz221.part2 mide072hhb ,n1180
with fuckit:
name = re.findall(r'[a-zA-Z]+\d{2,}', name)[-1]
# 比如MCDV-47 mcdv-047 是2个不一样的片子但是 SIVR-00008 和 SIVR-008是同同一部,但是heyzo除外,heyzo 是四位数
if "heyzo" not in name.lower():
name = re.sub(r'([a-zA-Z]{2,})(?:0*?)(\d{2,})', r'\1-\2', name)
# 正则取含-的番号 【字母-[字母]数字】,数字必定大于2位 番号的数组的最后的一个元素
with fuckit:
# MKBD_S03-MaRieS
name = re.findall(r'[a-zA-Z|\d]+-[a-zA-Z|\d]*\d{2,}', name)[-1]
# 107NTTR-037 -> NTTR-037 , SIVR-00008 -> SIVR-008 但是heyzo除外
if "heyzo" not in name.lower():
searched = re.search(r'([a-zA-Z]{2,})-(?:0*)(\d{3,})', name)
if searched:
name = '-'.join(searched.groups())
return episode, name
@staticmethod
def extract_episode_behind_code(origin_name, code):
episode = None
with fuckit:
# 零宽断言获取尾部字幕 剧集数 abc123
result_dict = re.search(rf'(?<={code})-?((?P<alpha>([A-Z](?![A-Z])))|(?P<num>\d(?!\d)))', origin_name,
re.I).groupdict()
episode = result_dict['alpha'] or result_dict['num']
return episode
def safe_list_get(list_in, idx, default):
try:
return list_in[idx]
except IndexError:
return default

19
Pipfile
View File

@ -1,19 +0,0 @@
[[source]]
name = "pypi"
url = "https://pypi.org/simple"
verify_ssl = true
[dev-packages]
[packages]
bs4 = "*"
tenacity = "*"
fuckit = "*"
requests = "*"
image = "*"
lazyxml = {editable = true,git = "https://github.com/waynedyck/lazyxml.git",ref = "python-3-conversion_wd1"}
lxml = "*"
pyquery = "*"
[requires]
python_version = "3.8"

246
Pipfile.lock generated
View File

@ -1,246 +0,0 @@
{
"_meta": {
"hash": {
"sha256": "15bf3c6af3ec315358a0217481a13285f95fc742bb5db8a1f934e0d1c3d7d5e2"
},
"pipfile-spec": 6,
"requires": {
"python_version": "3.8"
},
"sources": [
{
"name": "pypi",
"url": "https://pypi.org/simple",
"verify_ssl": true
}
]
},
"default": {
"asgiref": {
"hashes": [
"sha256:5ee950735509d04eb673bd7f7120f8fa1c9e2df495394992c73234d526907e17",
"sha256:7162a3cb30ab0609f1a4c95938fd73e8604f63bdba516a7f7d64b83ff09478f0"
],
"markers": "python_version >= '3.5'",
"version": "==3.3.1"
},
"beautifulsoup4": {
"hashes": [
"sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
"sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25",
"sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"
],
"version": "==4.9.3"
},
"bs4": {
"hashes": [
"sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
],
"index": "pypi",
"version": "==0.0.1"
},
"certifi": {
"hashes": [
"sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
"sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
],
"version": "==2020.12.5"
},
"chardet": {
"hashes": [
"sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
"sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
"version": "==4.0.0"
},
"cssselect": {
"hashes": [
"sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf",
"sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.1.0"
},
"django": {
"hashes": [
"sha256:2d78425ba74c7a1a74b196058b261b9733a8570782f4e2828974777ccca7edf7",
"sha256:efa2ab96b33b20c2182db93147a0c3cd7769d418926f9e9f140a60dca7c64ca9"
],
"markers": "python_version >= '3.6'",
"version": "==3.1.5"
},
"fuckit": {
"hashes": [
"sha256:059488e6aa2053da9db5eb5101e2498f608314da5118bf2385acb864568ccc25"
],
"index": "pypi",
"version": "==4.8.1"
},
"idna": {
"hashes": [
"sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
"sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==2.10"
},
"image": {
"hashes": [
"sha256:baa2e09178277daa50f22fd6d1d51ec78f19c12688921cb9ab5808743f097126"
],
"index": "pypi",
"version": "==1.5.33"
},
"lazyxml": {
"editable": true,
"git": "https://github.com/waynedyck/lazyxml.git",
"ref": "f42ea4a4febf4c1e120b05d6ca9cef42556a75d5"
},
"lxml": {
"hashes": [
"sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d",
"sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37",
"sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01",
"sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2",
"sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644",
"sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75",
"sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80",
"sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2",
"sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780",
"sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98",
"sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308",
"sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf",
"sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388",
"sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d",
"sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3",
"sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8",
"sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af",
"sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2",
"sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e",
"sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939",
"sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03",
"sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d",
"sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a",
"sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5",
"sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a",
"sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711",
"sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf",
"sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089",
"sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505",
"sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b",
"sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f",
"sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc",
"sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e",
"sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931",
"sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc",
"sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe",
"sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e"
],
"index": "pypi",
"version": "==4.6.2"
},
"pillow": {
"hashes": [
"sha256:165c88bc9d8dba670110c689e3cc5c71dbe4bfb984ffa7cbebf1fac9554071d6",
"sha256:1d208e670abfeb41b6143537a681299ef86e92d2a3dac299d3cd6830d5c7bded",
"sha256:22d070ca2e60c99929ef274cfced04294d2368193e935c5d6febfd8b601bf865",
"sha256:2353834b2c49b95e1313fb34edf18fca4d57446675d05298bb694bca4b194174",
"sha256:39725acf2d2e9c17356e6835dccebe7a697db55f25a09207e38b835d5e1bc032",
"sha256:3de6b2ee4f78c6b3d89d184ade5d8fa68af0848f9b6b6da2b9ab7943ec46971a",
"sha256:47c0d93ee9c8b181f353dbead6530b26980fe4f5485aa18be8f1fd3c3cbc685e",
"sha256:5e2fe3bb2363b862671eba632537cd3a823847db4d98be95690b7e382f3d6378",
"sha256:604815c55fd92e735f9738f65dabf4edc3e79f88541c221d292faec1904a4b17",
"sha256:6c5275bd82711cd3dcd0af8ce0bb99113ae8911fc2952805f1d012de7d600a4c",
"sha256:731ca5aabe9085160cf68b2dbef95fc1991015bc0a3a6ea46a371ab88f3d0913",
"sha256:7612520e5e1a371d77e1d1ca3a3ee6227eef00d0a9cddb4ef7ecb0b7396eddf7",
"sha256:7916cbc94f1c6b1301ac04510d0881b9e9feb20ae34094d3615a8a7c3db0dcc0",
"sha256:81c3fa9a75d9f1afafdb916d5995633f319db09bd773cb56b8e39f1e98d90820",
"sha256:887668e792b7edbfb1d3c9d8b5d8c859269a0f0eba4dda562adb95500f60dbba",
"sha256:93a473b53cc6e0b3ce6bf51b1b95b7b1e7e6084be3a07e40f79b42e83503fbf2",
"sha256:96d4dc103d1a0fa6d47c6c55a47de5f5dafd5ef0114fa10c85a1fd8e0216284b",
"sha256:a3d3e086474ef12ef13d42e5f9b7bbf09d39cf6bd4940f982263d6954b13f6a9",
"sha256:b02a0b9f332086657852b1f7cb380f6a42403a6d9c42a4c34a561aa4530d5234",
"sha256:b09e10ec453de97f9a23a5aa5e30b334195e8d2ddd1ce76cc32e52ba63c8b31d",
"sha256:b6f00ad5ebe846cc91763b1d0c6d30a8042e02b2316e27b05de04fa6ec831ec5",
"sha256:bba80df38cfc17f490ec651c73bb37cd896bc2400cfba27d078c2135223c1206",
"sha256:c3d911614b008e8a576b8e5303e3db29224b455d3d66d1b2848ba6ca83f9ece9",
"sha256:ca20739e303254287138234485579b28cb0d524401f83d5129b5ff9d606cb0a8",
"sha256:cb192176b477d49b0a327b2a5a4979552b7a58cd42037034316b8018ac3ebb59",
"sha256:cdbbe7dff4a677fb555a54f9bc0450f2a21a93c5ba2b44e09e54fcb72d2bd13d",
"sha256:cf6e33d92b1526190a1de904df21663c46a456758c0424e4f947ae9aa6088bf7",
"sha256:d355502dce85ade85a2511b40b4c61a128902f246504f7de29bbeec1ae27933a",
"sha256:d673c4990acd016229a5c1c4ee8a9e6d8f481b27ade5fc3d95938697fa443ce0",
"sha256:dc577f4cfdda354db3ae37a572428a90ffdbe4e51eda7849bf442fb803f09c9b",
"sha256:dd9eef866c70d2cbbea1ae58134eaffda0d4bfea403025f4db6859724b18ab3d",
"sha256:f50e7a98b0453f39000619d845be8b06e611e56ee6e8186f7f60c3b1e2f0feae"
],
"markers": "python_version >= '3.6'",
"version": "==8.1.0"
},
"pyquery": {
"hashes": [
"sha256:1fc33b7699455ed25c75282bc8f80ace1ac078b0dda5a933dacbd8b1c1f83963",
"sha256:a388eefb6bc4a55350de0316fbd97cda999ae669b6743ae5b99102ba54f5aa72"
],
"index": "pypi",
"version": "==1.4.3"
},
"pytz": {
"hashes": [
"sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4",
"sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5"
],
"version": "==2020.5"
},
"requests": {
"hashes": [
"sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
"sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
],
"index": "pypi",
"version": "==2.25.1"
},
"six": {
"hashes": [
"sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
"sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
"version": "==1.15.0"
},
"soupsieve": {
"hashes": [
"sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851",
"sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e"
],
"markers": "python_version >= '3.0'",
"version": "==2.1"
},
"sqlparse": {
"hashes": [
"sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
"sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8"
],
"markers": "python_version >= '3.5'",
"version": "==0.4.1"
},
"tenacity": {
"hashes": [
"sha256:baed357d9f35ec64264d8a4bbf004c35058fad8795c5b0d8a7dc77ecdcbb8f39",
"sha256:e14d191fb0a309b563904bbc336582efe2037de437e543b38da749769b544d7f"
],
"index": "pypi",
"version": "==6.3.1"
},
"urllib3": {
"hashes": [
"sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08",
"sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"
],
"markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
"version": "==1.26.2"
}
},
"develop": {}
}

1
README.md Executable file → Normal file
View File

@ -246,7 +246,6 @@ update_check=1
可以在多个有影片目录的父目录下搜索影片后缀,然后剪切到和程序同一目录下 可以在多个有影片目录的父目录下搜索影片后缀,然后剪切到和程序同一目录下
## 多集影片处理 ## 多集影片处理
**建议使用视频合并合并为一个视频文件**
可以把多集电影按照集数后缀命名为类似```ssni-xxx-cd1.mp4m,ssni-xxx-cd2.mp4abp-xxx-CD1.mp4```的规则,只要含有```-CDn./-cdn.```类似命名规则,即可使用分集功能 可以把多集电影按照集数后缀命名为类似```ssni-xxx-cd1.mp4m,ssni-xxx-cd2.mp4abp-xxx-CD1.mp4```的规则,只要含有```-CDn./-cdn.```类似命名规则,即可使用分集功能
## 中文字幕处理 ## 中文字幕处理

View File

@ -1,41 +0,0 @@
/Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi
/Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4
/Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21299.mkv
/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
/mcdv47.avi
/mcdv-47.avi
/mcdv-047.mp4
/mcdv047.mp4
/mcdv0047.mp4
/1pondo-070409_621.mp4
/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
/Volumes/Adult/Files/107NTTR-037A.mp4
/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発 CD1.wmv
/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb
/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
/Volumes/Adult/Files/tia/soe935C.HD.wmv
/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
/Volumes/Adult/Files/ノ瀬アメリ/20101202一瀬アメリ - 東京ブルドック05(inu006).avi
/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv

View File

@ -1,51 +0,0 @@
/Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21222.mkv
/Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi
/Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4
/Volumes/192.168.2.100/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-1 彼女の姉貴とイケナイ関係 Rio.wmv
/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999A 彼女の姉貴とイケナイ関係 Rio.wmv
/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-A 彼女の姉貴とイケナイ関係 Rio.wmv
/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-C 彼女の姉貴とイケナイ関係 Rio.wmv
/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-B 彼女の姉貴とイケナイ関係 Rio.wmv
/Volumes/192.168.2.100/Adult/Files/tia/soe935C.HD.wmv
/Volumes/192.168.2.100/Adult/Files/tia/soe935B.HD.wmv
/Volumes/192.168.2.100/Adult/Files/tia/soe935A.HD.wmv
/Volumes/192.168.2.100/Adult/Files/tia/soe935D.HD.wmv
/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
/mcdv47.avi
/mcdv-47.avi
/mcdv-047.mp4
/mcdv047.mp4
/mcdv0047.mp4
/1pondo-070409_621.mp4
/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
/Volumes/Adult/Files/107NTTR-037A.mp4
/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発 CD1.wmv
/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb
/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
/Volumes/Adult/Files/ノ瀬アメリ/20101202一瀬アメリ - 東京ブルドック05(inu006).avi
/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv

View File

@ -1,50 +0,0 @@
/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
/1pondo-070409_621.mp4
/Volumes/Adult/Files/107NTTR-037.mp4
/Volumes/Adult/Files/107NTTR-037A.mp4
/Volumes/Adult/Files/Yua.Mikami-PML/TEK-097 ふたりは無敵.wmv
/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
/Volumes/Adult/Files/Yua.Mikami-PML/SSNI-030 三上悠亜ファン感謝祭 国民的アイドル×一般ユーザー20人ガチファンとSEX解禁ハメまくりスペシャル【桃花族】.mp4
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/MIDD-893A.mkv
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発 CD1.wmv
/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb
/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
/Volumes/Adult/Files/桜木凛 Rin Sakuragi FHD Collection Pack Vol/BBI-183.wmv
/Volumes/Adult/Files/NOP-019 芭蕾教室 水嶋あずみ/NOP019B.HD.wmv
/Volumes/Adult/Files/一ノ瀬アメリ part2/栗栖エリカ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
/Volumes/Adult/Files/一ノ瀬アメリ part2/Max Girls/Max Girls 24(xv804)伊東遥,Rio,小沢アリス,葉月しおり,一ノ瀬アメリ,ひなた結衣,藤崎りお.avi
/Volumes/Adult/Files/一ノ瀬アメリ part2/瀬アメリAmeri Ichinose/20091127一瀬アメリ - 一見面就做愛(xv801).avi
/Volumes/Adult/Files/Aki Sasaki Megapack/MSTG-003.mkv
/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-171)彼女のお姉さんは、誘惑ヤリたがり娘。桃谷エリカ.wmv
/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-145)濃密な接吻と欲情ベロキス性交 04 桃谷エリカ.wmv
/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
/Volumes/Adult/Files/tia/soe935C.HD.wmv
/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
/Volumes/Adult/Files/sakumomo1203-PML/IDBD-795 ももに夢中 2018年日本人にもっとも愛された女優桜空ももPREMIUM BOX8時間BEST.mp4
/Volumes/Adult/Files/sakumomo1203-PML/IDBD-768 Gカップグラビアアイドル桜空もも初ベスト 原石 2【桃花族】.mp4
/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#3(190119)@RUNBKK/No-Watermarked/SOE976.FHD3.wmv
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
/Volumes/Adult/Files/ノ瀬アメリ/20101202一瀬アメリ - 東京ブルドック05(inu006).avi
/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
/Volumes/Adult/Files/ノ瀬アメリ/20100226一瀬アメリ - OL Style 制服(xv827).avi
/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv
/Volumes/Adult/Files/Uncensored Mosaic Removal Megapack/ADN-017(Asami Ogawa).mp4

229
SiteSource/avsox.py → avsox.py Executable file → Normal file
View File

@ -1,116 +1,115 @@
import re import re
from lxml import etree from lxml import etree
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ADC_function import * from ADC_function import *
# import sys # import sys
# import io # import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'}) a = soup.find_all(attrs={'class': 'avatar-box'})
d = {} d = {}
for i in a: for i in a:
l = i.img['src'] l = i.img['src']
t = i.span.get_text() t = i.span.get_text()
p2 = {t: l} p2 = {t: l}
d.update(p2) d.update(p2)
return d return d
def getTitle(a): def getTitle(a):
try: try:
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0] result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
return result.replace('/', '') return result.replace('/', '')
except: except:
return '' return ''
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
soup = BeautifulSoup(a, 'lxml') soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'avatar-box'}) a = soup.find_all(attrs={'class': 'avatar-box'})
d = [] d = []
for i in a: for i in a:
d.append(i.span.get_text()) d.append(i.span.get_text())
return d return d
def getStudio(a): def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ') result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
return result1 return result1
def getRuntime(a): def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']") result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
return result1 return result1
def getLabel(a): def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']") result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
return result1 return result1
def getNum(a): def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']") result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
return result1 return result1
def getYear(release): def getYear(release):
try: try:
result = str(re.search('\d{4}',release).group()) result = str(re.search('\d{4}',release).group())
return result return result
except: except:
return release return release
def getRelease(a): def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']") result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
return result1 return result1
def getCover(htmlcode): def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']") result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
return result return result
def getCover_small(htmlcode): def getCover_small(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
return result return result
def getTag(a): # 获取演员 def getTag(a): # 获取演员
soup = BeautifulSoup(a, 'lxml') soup = BeautifulSoup(a, 'lxml')
a = soup.find_all(attrs={'class': 'genre'}) a = soup.find_all(attrs={'class': 'genre'})
d = [] d = []
for i in a: for i in a:
d.append(i.get_text()) d.append(i.get_text())
return d return d
def main(number): def main(number):
url = 'https://avsox.host/cn/search/' + number a = get_html('https://avsox.host/cn/search/' + number)
a = get_html(url) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") if result1 == '' or result1 == 'null' or result1 == 'None':
if result1 == '' or result1 == 'null' or result1 == 'None': a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_')) print(a)
print(a) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") if result1 == '' or result1 == 'null' or result1 == 'None':
if result1 == '' or result1 == 'null' or result1 == 'None': a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
a = get_html('https://avsox.host/cn/search/' + number.replace('_', '')) print(a)
print(a) html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") web = get_html(result1)
web = get_html(result1) soup = BeautifulSoup(web, 'lxml')
soup = BeautifulSoup(web, 'lxml') info = str(soup.find(attrs={'class': 'row movie'}))
info = str(soup.find(attrs={'class': 'row movie'})) dic = {
dic = { 'actor': getActor(web),
'actor': getActor(web), 'title': getTitle(web).strip(getNum(web)),
'title': getTitle(web).strip(getNum(web)), 'studio': getStudio(info),
'studio': getStudio(info), 'outline': '',#
'outline': '',# 'runtime': getRuntime(info),
'runtime': getRuntime(info), 'director': '', #
'director': '', # 'release': getRelease(info),
'release': getRelease(info), 'number': getNum(info),
'number': getNum(info), 'cover': getCover(web),
'cover': getCover(web), 'cover_small': getCover_small(a),
'cover_small': getCover_small(a), 'imagecut': 3,
'imagecut': 3, 'tag': getTag(web),
'tag': getTag(web), 'label': getLabel(info),
'label': getLabel(info), 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()), 'actor_photo': getActorPhoto(web),
'actor_photo': getActorPhoto(web), 'website': result1,
'website': result1, 'source': 'avsox.py',
'source': 'avsox.py', }
} js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js
return js
#print(main('012717_472')) #print(main('012717_472'))

29
config.ini Executable file → Normal file
View File

@ -1,35 +1,28 @@
[common] [common]
main_mode=2 main_mode=1
# 路径均为绝对路径,不要写入" '等符号 failed_output_folder=failed
search_folder= /Volumes/192.168.2.100/Adult/AVTest success_output_folder=JAV_output
# 如果failed_output_folder 为空,抓取不到相关信息的视频将不回移动
failed_output_folder= /Volumes/192.168.2.100/Adult/UnknownStars
success_output_folder= /Volumes/192.168.2.100/Adult/Files
#临时资源存储路径比如xxx.nfo 海报图
temp_folder= /Volumes/192.168.2.100/Adult/temp
# 如果是远程挂载的盘符建议不开启创建软连接软连接链接的是绝对路径远程NAS上的路径和本地挂载的路径一般不同。
soft_link=0 soft_link=0
[proxy] [proxy]
#例子为socks代理配置可以 =后留空 proxy=127.0.0.1:1080
proxy= socks5h://127.0.0.1:1081 timeout=10
timeout= 10 retry=3
retry= 5
[Name_Rule] [Name_Rule]
location_rule= actor+'/'+number location_rule=actor+'/'+number
naming_rule= number+'-'+title naming_rule=number+'-'+title
[update] [update]
update_check=1 update_check=1
[media] [media]
media_warehouse=emby
#emby or plex or kodi ,emby=jellyfin #emby or plex or kodi ,emby=jellyfin
media_warehouse=EMBY
[escape] [escape]
literals=\() literals=\()
folders=/Volumes/Adult/UnknownStars,/Volumes/Adult/Stars folders=failed,JAV_output
[debug_mode] [debug_mode]
switch=1 switch=1

1607
core.py

File diff suppressed because it is too large Load Diff

458
SiteSource/fanza.py → fanza.py Executable file → Normal file
View File

@ -1,229 +1,229 @@
#!/usr/bin/python3 #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
import json import json
import re import re
from lxml import etree from lxml import etree
from ADC_function import * from ADC_function import *
# import sys # import sys
# import io # import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(text): def getTitle(text):
html = etree.fromstring(text, etree.HTMLParser()) html = etree.fromstring(text, etree.HTMLParser())
result = html.xpath('//*[@id="title"]/text()')[0] result = html.xpath('//*[@id="title"]/text()')[0]
return result return result
def getActor(text): def getActor(text):
# //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(text, etree.HTMLParser()) html = etree.fromstring(text, etree.HTMLParser())
result = ( result = (
str( str(
html.xpath( html.xpath(
"//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()" "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
) )
) )
.strip(" ['']") .strip(" ['']")
.replace("', '", ",") .replace("', '", ",")
) )
return result return result
def getStudio(text): def getStudio(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'メーカー')]/following-sibling::td/a/text()" "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
)[0] )[0]
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'メーカー')]/following-sibling::td/text()" "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
)[0] )[0]
return result return result
def getRuntime(text): def getRuntime(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
return re.search(r"\d+", str(result)).group() return re.search(r"\d+", str(result)).group()
def getLabel(text): def getLabel(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()" "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
)[0] )[0]
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'シリーズ:')]/following-sibling::td/text()" "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
)[0] )[0]
return result return result
def getNum(text): def getNum(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'品番:')]/following-sibling::td/a/text()" "//td[contains(text(),'品番:')]/following-sibling::td/a/text()"
)[0] )[0]
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'品番:')]/following-sibling::td/text()" "//td[contains(text(),'品番:')]/following-sibling::td/text()"
)[0] )[0]
return result return result
def getYear(getRelease): def getYear(getRelease):
try: try:
result = str(re.search(r"\d{4}", getRelease).group()) result = str(re.search(r"\d{4}", getRelease).group())
return result return result
except: except:
return getRelease return getRelease
def getRelease(text): def getRelease(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'発売日:')]/following-sibling::td/a/text()" "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
)[0].lstrip("\n") )[0].lstrip("\n")
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'発売日:')]/following-sibling::td/text()" "//td[contains(text(),'発売日:')]/following-sibling::td/text()"
)[0].lstrip("\n") )[0].lstrip("\n")
return result return result
def getTag(text): def getTag(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
) )
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
) )
return result return result
def getCover(text, number): def getCover(text, number):
html = etree.fromstring(text, etree.HTMLParser()) html = etree.fromstring(text, etree.HTMLParser())
cover_number = number cover_number = number
try: try:
result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
except: except:
# sometimes fanza modify _ to \u0005f for image id # sometimes fanza modify _ to \u0005f for image id
if "_" in cover_number: if "_" in cover_number:
cover_number = cover_number.replace("_", r"\u005f") cover_number = cover_number.replace("_", r"\u005f")
try: try:
result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
except: except:
# (TODO) handle more edge case # (TODO) handle more edge case
# print(html) # print(html)
# raise exception here, same behavior as before # raise exception here, same behavior as before
# people's major requirement is fetching the picture # people's major requirement is fetching the picture
raise ValueError("can not find image") raise ValueError("can not find image")
return result return result
def getDirector(text): def getDirector(text):
html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
try: try:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'監督:')]/following-sibling::td/a/text()" "//td[contains(text(),'監督:')]/following-sibling::td/a/text()"
)[0] )[0]
except: except:
result = html.xpath( result = html.xpath(
"//td[contains(text(),'監督:')]/following-sibling::td/text()" "//td[contains(text(),'監督:')]/following-sibling::td/text()"
)[0] )[0]
return result return result
def getOutline(text): def getOutline(text):
html = etree.fromstring(text, etree.HTMLParser()) html = etree.fromstring(text, etree.HTMLParser())
try: try:
result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace( result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
"\n", "" "\n", ""
) )
if result == "": if result == "":
result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace( result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
"\n", "" "\n", ""
) )
except: except:
# (TODO) handle more edge case # (TODO) handle more edge case
# print(html) # print(html)
return "" return ""
return result return result
def main(number): def main(number):
# fanza allow letter + number + underscore, normalize the input here # fanza allow letter + number + underscore, normalize the input here
# @note: I only find the usage of underscore as h_test123456789 # @note: I only find the usage of underscore as h_test123456789
fanza_search_number = number fanza_search_number = number
# AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
if fanza_search_number.startswith("h-"): if fanza_search_number.startswith("h-"):
fanza_search_number = fanza_search_number.replace("h-", "h_") fanza_search_number = fanza_search_number.replace("h-", "h_")
fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower() fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
fanza_urls = [ fanza_urls = [
"https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=", "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
"https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=", "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
"https://www.dmm.co.jp/digital/anime/-/detail/=/cid=", "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
"https://www.dmm.co.jp/mono/anime/-/detail/=/cid=", "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
] ]
chosen_url = "" chosen_url = ""
for url in fanza_urls: for url in fanza_urls:
chosen_url = url + fanza_search_number chosen_url = url + fanza_search_number
htmlcode = get_html(chosen_url) htmlcode = get_html(chosen_url)
if "404 Not Found" not in htmlcode: if "404 Not Found" not in htmlcode:
break break
if "404 Not Found" in htmlcode: if "404 Not Found" in htmlcode:
return json.dumps({"title": "",}) return json.dumps({"title": "",})
try: try:
# for some old page, the input number does not match the page # for some old page, the input number does not match the page
# for example, the url will be cid=test012 # for example, the url will be cid=test012
# but the hinban on the page is test00012 # but the hinban on the page is test00012
# so get the hinban first, and then pass it to following functions # so get the hinban first, and then pass it to following functions
fanza_hinban = getNum(htmlcode) fanza_hinban = getNum(htmlcode)
data = { data = {
"title": getTitle(htmlcode).strip(getActor(htmlcode)), "title": getTitle(htmlcode).strip(getActor(htmlcode)),
"studio": getStudio(htmlcode), "studio": getStudio(htmlcode),
"outline": getOutline(htmlcode), "outline": getOutline(htmlcode),
"runtime": getRuntime(htmlcode), "runtime": getRuntime(htmlcode),
"director": getDirector(htmlcode) if "anime" not in chosen_url else "", "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
"actor": getActor(htmlcode) if "anime" not in chosen_url else "", "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
"release": getRelease(htmlcode), "release": getRelease(htmlcode),
"number": fanza_hinban, "number": fanza_hinban,
"cover": getCover(htmlcode, fanza_hinban), "cover": getCover(htmlcode, fanza_hinban),
"imagecut": 1, "imagecut": 1,
"tag": getTag(htmlcode), "tag": getTag(htmlcode),
"label": getLabel(htmlcode), "label": getLabel(htmlcode),
"year": getYear( "year": getYear(
getRelease(htmlcode) getRelease(htmlcode)
), # str(re.search('\d{4}',getRelease(a)).group()), ), # str(re.search('\d{4}',getRelease(a)).group()),
"actor_photo": "", "actor_photo": "",
"website": chosen_url, "website": chosen_url,
"source": "fanza.py", "source": "fanza.py",
} }
except: except:
data = { data = {
"title": "", "title": "",
} }
js = json.dumps( js = json.dumps(
data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":") data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
) # .encode('UTF-8') ) # .encode('UTF-8')
return js return js
if __name__ == "__main__": if __name__ == "__main__":
# print(main("DV-1562")) # print(main("DV-1562"))
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
# print(main("ipx292")) # print(main("ipx292"))
pass pass

View File

@ -1,162 +1,162 @@
import re import re
from lxml import etree#need install from lxml import etree#need install
import json import json
import ADC_function import ADC_function
# import sys # import sys
# import io # import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(htmlcode): #获取厂商 def getTitle(htmlcode): #获取厂商
#print(htmlcode) #print(htmlcode)
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1) result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
#print(result2) #print(result2)
return result2 return result2
def getActor(htmlcode): def getActor(htmlcode):
try: try:
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
return result return result
except: except:
return '' return ''
def getStudio(htmlcode): #获取厂商 def getStudio(htmlcode): #获取厂商
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
return result return result
def getNum(htmlcode): #获取番号 def getNum(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
#print(result) #print(result)
return result return result
def getRelease(htmlcode2): # def getRelease(htmlcode2): #
#a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
html=etree.fromstring(htmlcode2,etree.HTMLParser()) html=etree.fromstring(htmlcode2,etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
return result return result
def getCover(htmlcode,number,htmlcode2): #获取厂商 # def getCover(htmlcode,number,htmlcode2): #获取厂商 #
#a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
html = etree.fromstring(htmlcode2, etree.HTMLParser()) html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']") result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
if result == '': if result == '':
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']") result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
return 'https://fc2club.com' + result2 return 'https://fc2club.com' + result2
return 'http:' + result return 'http:' + result
def getOutline(htmlcode2): #获取番号 # def getOutline(htmlcode2): #获取番号 #
html = etree.fromstring(htmlcode2, etree.HTMLParser()) html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
return result return result
def getTag(htmlcode): #获取番号 def getTag(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()')) result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
return result.strip(" ['']").replace("'",'').replace(' ','') return result.strip(" ['']").replace("'",'').replace(' ','')
def getYear(release): def getYear(release):
try: try:
result = re.search('\d{4}',release).group() result = re.search('\d{4}',release).group()
return result return result
except: except:
return '' return ''
def getTitle_fc2com(htmlcode): #获取厂商 def getTitle_fc2com(htmlcode): #获取厂商
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0] result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
return result return result
def getActor_fc2com(htmlcode): def getActor_fc2com(htmlcode):
try: try:
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
return result return result
except: except:
return '' return ''
def getStudio_fc2com(htmlcode): #获取厂商 def getStudio_fc2com(htmlcode): #获取厂商
try: try:
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']") result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
return result return result
except: except:
return '' return ''
def getNum_fc2com(htmlcode): #获取番号 def getNum_fc2com(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
return result return result
def getRelease_fc2com(htmlcode2): # def getRelease_fc2com(htmlcode2): #
html=etree.fromstring(htmlcode2,etree.HTMLParser()) html=etree.fromstring(htmlcode2,etree.HTMLParser())
result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
return result return result
def getCover_fc2com(htmlcode2): #获取厂商 # def getCover_fc2com(htmlcode2): #获取厂商 #
html = etree.fromstring(htmlcode2, etree.HTMLParser()) html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']") result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
return 'http:' + result return 'http:' + result
def getOutline_fc2com(htmlcode2): #获取番号 # def getOutline_fc2com(htmlcode2): #获取番号 #
html = etree.fromstring(htmlcode2, etree.HTMLParser()) html = etree.fromstring(htmlcode2, etree.HTMLParser())
result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
return result return result
def getTag_fc2com(number): #获取番号 def getTag_fc2com(number): #获取番号
htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape')) htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
result = re.findall('"tag":"(.*?)"', htmlcode) result = re.findall('"tag":"(.*?)"', htmlcode)
return result return result
def getYear_fc2com(release): def getYear_fc2com(release):
try: try:
result = re.search('\d{4}',release).group() result = re.search('\d{4}',release).group()
return result return result
except: except:
return '' return ''
def main(number): def main(number):
try: try:
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/') htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html') htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
actor = getActor(htmlcode) actor = getActor(htmlcode)
if getActor(htmlcode) == '': if getActor(htmlcode) == '':
actor = 'FC2系列' actor = 'FC2系列'
dic = { dic = {
'title': getTitle(htmlcode), 'title': getTitle(htmlcode),
'studio': getStudio(htmlcode), 'studio': getStudio(htmlcode),
'year': '',#str(re.search('\d{4}',getRelease(number)).group()), 'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
'outline': '',#getOutline(htmlcode2), 'outline': '',#getOutline(htmlcode2),
'runtime': getYear(getRelease(htmlcode)), 'runtime': getYear(getRelease(htmlcode)),
'director': getStudio(htmlcode), 'director': getStudio(htmlcode),
'actor': actor, 'actor': actor,
'release': getRelease(number), 'release': getRelease(number),
'number': 'FC2-'+number, 'number': 'FC2-'+number,
'label': '', 'label': '',
'cover': getCover(htmlcode,number,htmlcode2), 'cover': getCover(htmlcode,number,htmlcode2),
'imagecut': 0, 'imagecut': 0,
'tag': getTag(htmlcode), 'tag': getTag(htmlcode),
'actor_photo':'', 'actor_photo':'',
'website': 'https://fc2club.com//html/FC2-' + number + '.html', 'website': 'https://fc2club.com//html/FC2-' + number + '.html',
'source':'https://fc2club.com//html/FC2-' + number + '.html', 'source':'https://fc2club.com//html/FC2-' + number + '.html',
} }
if dic['title'] == '': if dic['title'] == '':
htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'}) htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
actor = getActor(htmlcode) actor = getActor(htmlcode)
if getActor(htmlcode) == '': if getActor(htmlcode) == '':
actor = 'FC2系列' actor = 'FC2系列'
dic = { dic = {
'title': getTitle_fc2com(htmlcode2), 'title': getTitle_fc2com(htmlcode2),
'studio': getStudio_fc2com(htmlcode2), 'studio': getStudio_fc2com(htmlcode2),
'year': '', # str(re.search('\d{4}',getRelease(number)).group()), 'year': '', # str(re.search('\d{4}',getRelease(number)).group()),
'outline': getOutline_fc2com(htmlcode2), 'outline': getOutline_fc2com(htmlcode2),
'runtime': getYear_fc2com(getRelease(htmlcode2)), 'runtime': getYear_fc2com(getRelease(htmlcode2)),
'director': getStudio_fc2com(htmlcode2), 'director': getStudio_fc2com(htmlcode2),
'actor': actor, 'actor': actor,
'release': getRelease_fc2com(number), 'release': getRelease_fc2com(number),
'number': 'FC2-' + number, 'number': 'FC2-' + number,
'cover': getCover_fc2com(htmlcode2), 'cover': getCover_fc2com(htmlcode2),
'imagecut': 0, 'imagecut': 0,
'tag': getTag_fc2com(number), 'tag': getTag_fc2com(number),
'label': '', 'label': '',
'actor_photo': '', 'actor_photo': '',
'website': 'http://adult.contents.fc2.com/article/' + number + '/', 'website': 'http://adult.contents.fc2.com/article/' + number + '/',
'source': 'http://adult.contents.fc2.com/article/' + number + '/', 'source': 'http://adult.contents.fc2.com/article/' + number + '/',
} }
except Exception as e: except Exception as e:
# (TODO) better handle this # (TODO) better handle this
# print(e) # print(e)
dic = {"title": ""} dic = {"title": ""}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
return js return js
#print(main('1252953')) #print(main('1252953'))

View File

@ -1,139 +1,138 @@
import re import re
from pyquery import PyQuery as pq#need install from pyquery import PyQuery as pq#need install
from lxml import etree#need install from lxml import etree#need install
from bs4 import BeautifulSoup#need install from bs4 import BeautifulSoup#need install
import json import json
from ADC_function import * from ADC_function import *
def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'star-name'}) a = soup.find_all(attrs={'class': 'star-name'})
d={} d={}
for i in a: for i in a:
l=i.a['href'] l=i.a['href']
t=i.get_text() t=i.get_text()
html = etree.fromstring(get_html(l), etree.HTMLParser()) html = etree.fromstring(get_html(l), etree.HTMLParser())
p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
p2={t:p} p2={t:p}
d.update(p2) d.update(p2)
return d return d
def getTitle(htmlcode): #获取标题 def getTitle(htmlcode): #获取标题
doc = pq(htmlcode) doc = pq(htmlcode)
title=str(doc('div.container h3').text()).replace(' ','-') title=str(doc('div.container h3').text()).replace(' ','-')
try: try:
title2 = re.sub('n\d+-','',title) title2 = re.sub('n\d+-','',title)
return title2 return title2
except: except:
return title return title
def getStudio(htmlcode): #获取厂商 def getStudio(htmlcode): #获取厂商
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
return result return result
def getYear(htmlcode): #获取年份 def getYear(htmlcode): #获取年份
html = etree.fromstring(htmlcode,etree.HTMLParser()) html = etree.fromstring(htmlcode,etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
return result return result
def getCover(htmlcode): #获取封面链接 def getCover(htmlcode): #获取封面链接
doc = pq(htmlcode) doc = pq(htmlcode)
image = doc('a.bigImage') image = doc('a.bigImage')
return image.attr('href') return image.attr('href')
def getRelease(htmlcode): #获取出版日期 def getRelease(htmlcode): #获取出版日期
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
return result return result
def getRuntime(htmlcode): #获取分钟 def getRuntime(htmlcode): #获取分钟
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find(text=re.compile('分鐘')) a = soup.find(text=re.compile('分鐘'))
return a return a
def getActor(htmlcode): #获取女优 def getActor(htmlcode): #获取女优
b=[] b=[]
soup=BeautifulSoup(htmlcode,'lxml') soup=BeautifulSoup(htmlcode,'lxml')
a=soup.find_all(attrs={'class':'star-name'}) a=soup.find_all(attrs={'class':'star-name'})
for i in a: for i in a:
b.append(i.get_text()) b.append(i.get_text())
return b return b
def getNum(htmlcode): #获取番号 def getNum(htmlcode): #获取番号
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
return result return result
def getDirector(htmlcode): #获取导演 def getDirector(htmlcode): #获取导演
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
return result return result
def getOutline(htmlcode): #获取演员 def getOutline(htmlcode): #获取演员
doc = pq(htmlcode) doc = pq(htmlcode)
result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text()) result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
return result return result
def getSerise(htmlcode): def getSerise(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
return result return result
def getTag(htmlcode): # 获取演员 def getTag(htmlcode): # 获取演员
tag = [] tag = []
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = soup.find_all(attrs={'class': 'genre'}) a = soup.find_all(attrs={'class': 'genre'})
for i in a: for i in a:
if 'onmouseout' in str(i): if 'onmouseout' in str(i):
continue continue
tag.append(i.get_text()) tag.append(i.get_text())
return tag return tag
def main(number): def main(number):
try: try:
htmlcode = get_html('https://www.javbus.com/' + number) htmlcode = get_html('https://www.javbus.com/' + number)
try: try:
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
except: except:
dww_htmlcode = '' dww_htmlcode = ''
dic = { dic = {
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
'studio': getStudio(htmlcode), 'studio': getStudio(htmlcode),
'year': str(re.search('\d{4}', getYear(htmlcode)).group()), 'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
'outline': getOutline(dww_htmlcode), 'outline': getOutline(dww_htmlcode),
'runtime': getRuntime(htmlcode), 'runtime': getRuntime(htmlcode),
'director': getDirector(htmlcode), 'director': getDirector(htmlcode),
'actor': getActor(htmlcode), 'actor': getActor(htmlcode),
'release': getRelease(htmlcode), 'release': getRelease(htmlcode),
'number': getNum(htmlcode), 'number': getNum(htmlcode),
'cover': getCover(htmlcode), 'cover': getCover(htmlcode),
'imagecut': 1, 'imagecut': 1,
'tag': getTag(htmlcode), 'tag': getTag(htmlcode),
'label': getSerise(htmlcode), 'label': getSerise(htmlcode),
'actor_photo': getActorPhoto(htmlcode), 'actor_photo': getActorPhoto(htmlcode),
'website': 'https://www.javbus.com/' + number, 'website': 'https://www.javbus.com/' + number,
'source' : 'javbus.py', 'source' : 'javbus.py',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js return js
except: except:
return main_uncensored(number) return main_uncensored(number)
def main_uncensored(number):
def main_uncensored(number): # 无码 htmlcode = get_html('https://www.javbus.com/' + number)
htmlcode = get_html('https://www.javbus.com/' + number) dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) if getTitle(htmlcode) == '':
if getTitle(htmlcode) == '': htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_')) dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) dic = {
dic = { 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))).replace(getNum(htmlcode)+'-', ''), 'studio': getStudio(htmlcode),
'studio': getStudio(htmlcode), 'year': getYear(htmlcode),
'year': getYear(htmlcode), 'outline': getOutline(dww_htmlcode),
'outline': getOutline(dww_htmlcode), 'runtime': getRuntime(htmlcode),
'runtime': getRuntime(htmlcode), 'director': getDirector(htmlcode),
'director': getDirector(htmlcode), 'actor': getActor(htmlcode),
'actor': getActor(htmlcode), 'release': getRelease(htmlcode),
'release': getRelease(htmlcode), 'number': getNum(htmlcode),
'number': getNum(htmlcode), 'cover': getCover(htmlcode),
'cover': getCover(htmlcode), 'tag': getTag(htmlcode),
'tag': getTag(htmlcode), 'label': getSerise(htmlcode),
'label': getSerise(htmlcode), 'imagecut': 0,
'imagecut': 0, 'actor_photo': '',
'actor_photo': '', 'website': 'https://www.javbus.com/' + number,
'website': 'https://www.javbus.com/' + number, 'source': 'javbus.py',
'source': 'javbus.py', }
} js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') return js
return js

View File

@ -1,123 +1,123 @@
import re import re
from lxml import etree from lxml import etree
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ADC_function import * from ADC_function import *
# import sys # import sys
# import io # import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(a): def getTitle(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser())
result = html.xpath("/html/body/section/div/h2/strong/text()")[0] result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
return result return result
def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
a = actor.split(',') a = actor.split(',')
d={} d={}
for i in a: for i in a:
p={i:''} p={i:''}
d.update(p) d.update(p)
return d return d
def getStudio(a): def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def getRuntime(a): def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').rstrip('mi') return str(result1 + result2).strip('+').rstrip('mi')
def getLabel(a): def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def getNum(a): def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser())
result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result2 + result1).strip('+') return str(result2 + result1).strip('+')
def getYear(getRelease): def getYear(getRelease):
try: try:
result = str(re.search('\d{4}', getRelease).group()) result = str(re.search('\d{4}', getRelease).group())
return result return result
except: except:
return getRelease return getRelease
def getRelease(a): def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+') return str(result1 + result2).strip('+')
def getTag(a): def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',') return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
def getCover_small(a, index=0): def getCover_small(a, index=0):
# same issue mentioned below, # same issue mentioned below,
# javdb sometime returns multiple results # javdb sometime returns multiple results
# DO NOT just get the firt one, get the one with correct index number # DO NOT just get the firt one, get the one with correct index number
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index] result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
if not 'https' in result: if not 'https' in result:
result = 'https:' + result result = 'https:' + result
return result return result
def getCover(htmlcode): def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']") result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
return result return result
def getDirector(a): def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']") result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']") result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
def getOutline(htmlcode): def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
return result return result
def main(number): def main(number):
try: try:
number = number.upper() number = number.upper()
query_result = get_html('https://javdb.com/search?q=' + number + '&f=all') query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
# javdb sometime returns multiple results, # javdb sometime returns multiple results,
# and the first elememt maybe not the one we are looking for # and the first elememt maybe not the one we are looking for
# iterate all candidates and find the match one # iterate all candidates and find the match one
urls = html.xpath('//*[@id="videos"]/div/div/a/@href') urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
correct_url = urls[ids.index(number)] correct_url = urls[ids.index(number)]
detail_page = get_html('https://javdb.com' + correct_url) detail_page = get_html('https://javdb.com' + correct_url)
dic = { dic = {
'actor': getActor(detail_page), 'actor': getActor(detail_page),
'title': getTitle(detail_page), 'title': getTitle(detail_page),
'studio': getStudio(detail_page), 'studio': getStudio(detail_page),
'outline': getOutline(detail_page), 'outline': getOutline(detail_page),
'runtime': getRuntime(detail_page), 'runtime': getRuntime(detail_page),
'director': getDirector(detail_page), 'director': getDirector(detail_page),
'release': getRelease(detail_page), 'release': getRelease(detail_page),
'number': getNum(detail_page), 'number': getNum(detail_page),
'cover': getCover(detail_page), 'cover': getCover(detail_page),
'cover_small': getCover_small(query_result, index=ids.index(number)), 'cover_small': getCover_small(query_result, index=ids.index(number)),
'imagecut': 3, 'imagecut': 3,
'tag': getTag(detail_page), 'tag': getTag(detail_page),
'label': getLabel(detail_page), 'label': getLabel(detail_page),
'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()), 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': getActorPhoto(getActor(detail_page)), 'actor_photo': getActorPhoto(getActor(detail_page)),
'website': 'https://javdb.com' + correct_url, 'website': 'https://javdb.com' + correct_url,
'source': 'javdb.py', 'source': 'javdb.py',
} }
except Exception as e: except Exception as e:
# print(e) # print(e)
dic = {"title": ""} dic = {"title": ""}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js return js
# main('DV-1562') # main('DV-1562')
# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
#print(main('ipx-292')) #print(main('ipx-292'))

View File

@ -1,108 +1,108 @@
import re import re
from lxml import etree from lxml import etree
import json import json
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from ADC_function import * from ADC_function import *
# import sys # import sys
# import io # import io
# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
def getTitle(a): def getTitle(a):
try: try:
html = etree.fromstring(a, etree.HTMLParser()) html = etree.fromstring(a, etree.HTMLParser())
result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']") result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
return result.replace('/', ',') return result.replace('/', ',')
except: except:
return '' return ''
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',') return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
def getStudio(a): def getStudio(a):
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return str(result1+result2).strip('+').replace("', '",'').replace('"','') return str(result1+result2).strip('+').replace("', '",'').replace('"','')
def getRuntime(a): def getRuntime(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
return str(result1 + result2).strip('+').rstrip('mi') return str(result1 + result2).strip('+').rstrip('mi')
def getLabel(a): def getLabel(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
return str(result1 + result2).strip('+').replace("', '",'').replace('"','') return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
def getNum(a): def getNum(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
return str(result1 + result2).strip('+') return str(result1 + result2).strip('+')
def getYear(getRelease): def getYear(getRelease):
try: try:
result = str(re.search('\d{4}',getRelease).group()) result = str(re.search('\d{4}',getRelease).group())
return result return result
except: except:
return getRelease return getRelease
def getRelease(a): def getRelease(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
return str(result1 + result2).strip('+') return str(result1 + result2).strip('+')
def getTag(a): def getTag(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','') return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
def getCover(htmlcode): def getCover(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']") result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
# /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src # /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
return result return result
def getDirector(a): def getDirector(a):
html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip( result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
'\\n') '\\n')
return str(result1 + result2).strip('+').replace("', '",'').replace('"','') return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
def getOutline(htmlcode): def getOutline(htmlcode):
html = etree.fromstring(htmlcode, etree.HTMLParser()) html = etree.fromstring(htmlcode, etree.HTMLParser())
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
return result return result
def main(number2): def main(number2):
number=number2.upper() number=number2.upper()
htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'})) htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
soup = BeautifulSoup(htmlcode, 'lxml') soup = BeautifulSoup(htmlcode, 'lxml')
a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','') a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
dic = { dic = {
'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''), 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
'studio': getStudio(a), 'studio': getStudio(a),
'outline': getOutline(htmlcode), 'outline': getOutline(htmlcode),
'runtime': getRuntime(a), 'runtime': getRuntime(a),
'director': getDirector(a), 'director': getDirector(a),
'actor': getActor(a), 'actor': getActor(a),
'release': getRelease(a), 'release': getRelease(a),
'number': getNum(a), 'number': getNum(a),
'cover': getCover(htmlcode), 'cover': getCover(htmlcode),
'imagecut': 0, 'imagecut': 0,
'tag': getTag(a), 'tag': getTag(a),
'label':getLabel(a), 'label':getLabel(a),
'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
'actor_photo': '', 'actor_photo': '',
'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
'source': 'mgstage.py', 'source': 'mgstage.py',
} }
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
return js return js
#print(main('SIRO-3607')) #print(main('SIRO-3607'))

Binary file not shown.

Binary file not shown.

Binary file not shown.

0
readme/This is readms.md's images folder Executable file → Normal file
View File

0
readme/flow_chart2.png Executable file → Normal file
View File

Before

Width:  |  Height:  |  Size: 101 KiB

After

Width:  |  Height:  |  Size: 101 KiB

0
readme/readme1.PNG Executable file → Normal file
View File

Before

Width:  |  Height:  |  Size: 1.1 KiB

After

Width:  |  Height:  |  Size: 1.1 KiB

0
readme/readme2.PNG Executable file → Normal file
View File

Before

Width:  |  Height:  |  Size: 3.4 KiB

After

Width:  |  Height:  |  Size: 3.4 KiB

0
readme/readme3.PNG Executable file → Normal file
View File

Before

Width:  |  Height:  |  Size: 1.3 KiB

After

Width:  |  Height:  |  Size: 1.3 KiB

0
readme/readme4.PNG Executable file → Normal file
View File

Before

Width:  |  Height:  |  Size: 16 KiB

After

Width:  |  Height:  |  Size: 16 KiB

0
readme/single.gif Executable file → Normal file
View File

Before

Width:  |  Height:  |  Size: 68 KiB

After

Width:  |  Height:  |  Size: 68 KiB

View File

@ -1 +0,0 @@
1

Binary file not shown.

Before

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.3 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

View File

@ -1 +0,0 @@
pipenv install -rlxml bs4 pillow pyquery

Binary file not shown.

Before

Width:  |  Height:  |  Size: 68 KiB

80
test.py
View File

@ -1,80 +0,0 @@
import os
import re
from itertools import groupby
import fuckit as fuckit
import pandas as pd
from tenacity import retry, stop_after_delay, wait_fixed
def go():
a = [1, 2, 3, 4, 5, 6]
# [print(x) for x in a]
# [print(x) for x in a]
a1 = groupby(a, key=lambda k: (k / 2))
for i in a1:
print(i)
for i in a1:
print(i)
class TryDo:
def __init__(self, func, times=3):
self.tries = times
self.func = func
def __iter__(self):
self.currentTry = 1
return self
def __next__(self):
if self.currentTry > self.tries:
raise StopIteration(False)
else:
self.currentTry += 1
self.func()
raise StopIteration(True)
# def do(self):
@retry(stop=stop_after_delay(3), wait=wait_fixed(2))
def stop_after_10_s():
print("Stopping after 10 seconds")
raise Exception
# f = iter( TryDo(do_something, 5))
# stop_after_10_s()
def errorfunc():
raise Exception
def okfunc():
print("ok")
# with fuckit:
# errorfunc()
# okfunc()
# re.match()
r = re.search(r'(?<=999)-?((?P<alpha>([A-Z](?![A-Z])))|(?P<num>\d(?!\d)))', "IPTD-999-B-彼女の姉貴とイケナイ関係-RIO", re.I)
#
print(r.groupdict())
print(r.groupdict()['alpha'])
print(r.group(2))
import re
line = "Cats are smarter than dogs"
matchObj = re.search(r'(?<=a)(.*) are (.*?) .*', line, re.M | re.I)
if matchObj:
print("matchObj.group() : ", matchObj.group())
print("matchObj.group(1) : ", matchObj.group(1))
print("matchObj.group(2) : ", matchObj.group(2))
else:
print("No match!!")
# print(r[-1])
# print(newList)

6
update_check.json Executable file → Normal file
View File

@ -1,5 +1,5 @@
{ {
"version": "2.8.2", "version": "2.8",
"version_show":"2.8.2", "version_show":"2.8",
"download": "https://github.com/yoshiko2/AV_Data_Capture/releases" "download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
} }