diff --git a/.gitignore b/.gitignore index 894a44c..cdc48c0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +*.DS_Store # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..e7e9d11 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,2 @@ +# Default ignored files +/workspace.xml diff --git a/.idea/AV_Data_Capture.iml b/.idea/AV_Data_Capture.iml new file mode 100644 index 0000000..21f057a --- /dev/null +++ b/.idea/AV_Data_Capture.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/dictionaries/tanpengsccd.xml b/.idea/dictionaries/tanpengsccd.xml new file mode 100644 index 0000000..d7229c1 --- /dev/null +++ b/.idea/dictionaries/tanpengsccd.xml @@ -0,0 +1,19 @@ + + + + avsox + emby + fanart + fanza + javbus + javdb + jellyfin + khtml + kodi + mgstage + plex + pondo + rmvb + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..a4410bf --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..9337de9 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/other.xml b/.idea/other.xml new file mode 100644 index 0000000..a708ec7 --- /dev/null +++ b/.idea/other.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/ADC_function.py b/ADC_function.py index 596a9ea..04708d6 100755 --- a/ADC_function.py +++ b/ADC_function.py @@ -1,136 +1,127 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import requests -from configparser import ConfigParser -import os -import re -import time -import sys -from lxml import etree -import sys -import io -# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) -# sys.setdefaultencoding('utf-8') - -config_file='config.ini' -config = ConfigParser() - -if os.path.exists(config_file): - try: - config.read(config_file, encoding='UTF-8') - except: - print('[-]Config.ini read failed! Please use the offical file!') -else: - print('[+]config.ini: not found, creating...',end='') - with open("config.ini", "wt", encoding='UTF-8') as code: - print("[common]", file=code) - print("main_mode = 1", file=code) - print("failed_output_folder = failed", file=code) - print("success_output_folder = JAV_output", file=code) - print("", file=code) - print("[proxy]",file=code) - print("proxy=127.0.0.1:1081",file=code) - print("timeout=10", file=code) - print("retry=3", file=code) - print("", file=code) - print("[Name_Rule]", file=code) - print("location_rule=actor+'/'+number",file=code) - print("naming_rule=number+'-'+title",file=code) - print("", file=code) - print("[update]",file=code) - print("update_check=1",file=code) - print("", file=code) - print("[media]", file=code) - print("media_warehouse=emby", file=code) - print("#emby plex kodi", file=code) - print("", file=code) - print("[escape]", file=code) - print("literals=\\", file=code) - print("", file=code) - print("[movie_location]", file=code) - print("path=", file=code) - print("", file=code) - print('.',end='') - time.sleep(2) - print('.') - print('[+]config.ini: created!') - print('[+]Please restart the program!') - time.sleep(4) - os._exit(0) - try: - config.read(config_file, encoding='UTF-8') - except: - print('[-]Config.ini read failed! Please use the offical file!') - -def get_network_settings(): - try: - proxy = config["proxy"]["proxy"] - timeout = int(config["proxy"]["timeout"]) - retry_count = int(config["proxy"]["retry"]) - assert timeout > 0 - assert retry_count > 0 - except: - raise ValueError("[-]Proxy config error! Please check the config.") - return proxy, timeout, retry_count - -def getDataState(json_data): # 元数据获取失败检测 - if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null': - return 0 - else: - return 1 - -def ReadMediaWarehouse(): - return config['media']['media_warehouse'] - -def UpdateCheckSwitch(): - check=str(config['update']['update_check']) - if check == '1': - return '1' - elif check == '0': - return '0' - elif check == '': - return '0' - -def getXpathSingle(htmlcode,xpath): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath(xpath)).strip(" ['']") - return result1 - -def get_html(url,cookies = None):#网页请求核心 - proxy, timeout, retry_count = get_network_settings() - i = 0 - while i < retry_count: - try: - if not proxy == '': - proxies = {"http": "http://" + proxy,"https": "https://" + proxy} - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} - getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies) - getweb.encoding = 'utf-8' - return getweb.text - else: - headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies) - getweb.encoding = 'utf-8' - return getweb.text - except: - i += 1 - print('[-]Connect retry '+str(i)+'/'+str(retry_count)) - print('[-]Connect Failed! Please check your Proxy or Network!') - - -def post_html(url: str, query: dict) -> requests.Response: - proxy, timeout, retry_count = get_network_settings() - - if proxy: - proxies = {"http": "http://" + proxy, "https": "https://" + proxy} - else: - proxies = {} - - for i in range(retry_count): - try: - result = requests.post(url, data=query, proxies=proxies) - return result - except requests.exceptions.ProxyError: - print("[-]Connect retry {}/{}".format(i+1, retry_count)) - print("[-]Connect Failed! Please check your Proxy or Network!") +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import requests +from configparser import ConfigParser +import os +import re +import time +import sys +from lxml import etree +import sys +import io +from ConfigApp import ConfigApp +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) +# sys.setdefaultencoding('utf-8') + +# config_file='config.ini' +# config = ConfigParser() + +# if os.path.exists(config_file): +# try: +# config.read(config_file, encoding='UTF-8') +# except: +# print('[-]Config.ini read failed! Please use the offical file!') +# else: +# print('[+]config.ini: not found, creating...',end='') +# with open("config.ini", "wt", encoding='UTF-8') as code: +# print("[common]", file=code) +# print("main_mode = 1", file=code) +# print("failed_output_folder = failed", file=code) +# print("success_output_folder = JAV_output", file=code) +# print("", file=code) +# print("[proxy]",file=code) +# print("proxy=127.0.0.1:1081",file=code) +# print("timeout=10", file=code) +# print("retry=3", file=code) +# print("", file=code) +# print("[Name_Rule]", file=code) +# print("location_rule=actor+'/'+number",file=code) +# print("naming_rule=number+'-'+title",file=code) +# print("", file=code) +# print("[update]",file=code) +# print("update_check=1",file=code) +# print("", file=code) +# print("[media]", file=code) +# print("media_warehouse=emby", file=code) +# print("#emby plex kodi", file=code) +# print("", file=code) +# print("[escape]", file=code) +# print("literals=\\", file=code) +# print("", file=code) +# print("[movie_location]", file=code) +# print("path=", file=code) +# print("", file=code) +# print('.',end='') +# time.sleep(2) +# print('.') +# print('[+]config.ini: created!') +# print('[+]Please restart the program!') +# time.sleep(4) +# os._exit(0) +# try: +# config.read(config_file, encoding='UTF-8') +# except: +# print('[-]Config.ini read failed! Please use the offical file!') + +config = ConfigApp() + + +def get_network_settings(): + try: + proxy = config.proxy + timeout = int(config.timeout) + retry_count = int(config.retry) + assert timeout > 0 + assert retry_count > 0 + except: + raise ValueError("[-]Proxy config error! Please check the config.") + return proxy, timeout, retry_count + +def getDataState(json_data): # 元数据获取失败检测 + if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null': + return 0 + else: + return 1 + +def ReadMediaWarehouse(): + return config.media_server + +def UpdateCheckSwitch(): + check=str(config.update_check) + if check == '1': + return '1' + elif check == '0': + return '0' + elif check == '': + return '0' + +def getXpathSingle(htmlcode,xpath): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result1 = str(html.xpath(xpath)).strip(" ['']") + return result1 + +def get_html(url,cookies = None):#网页请求核心 + proxy, timeout, retry_count = get_network_settings() + i = 0 + print(url) + while i < retry_count: + try: + if not proxy == '': + proxies = {"http": proxy, "https": proxy} + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} + getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies) + getweb.encoding = 'utf-8' + return getweb.text + else: + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} + getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies) + getweb.encoding = 'utf-8' + return getweb.text + except Exception as e: + print(e) + i += 1 + print('[-]Connect retry '+str(i)+'/'+str(retry_count)) + print('[-]Connect Failed! Please check your Proxy or Network!') + + diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py index 547595e..1994529 100755 --- a/AV_Data_Capture.py +++ b/AV_Data_Capture.py @@ -1,162 +1,416 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import glob -import os -import time -import re -from ADC_function import * -from core import * -import json -import shutil -from configparser import ConfigParser -import argparse - - -def UpdateCheck(version): - if UpdateCheckSwitch() == '1': - html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json') - html = json.loads(str(html2)) - - if not version == html['version']: - print('[*] * New update ' + html['version'] + ' *') - print('[*] ↓ Download ↓') - print('[*] ' + html['download']) - print('[*]======================================================') - else: - print('[+]Update Check disabled!') - -def argparse_get_file(): - parser = argparse.ArgumentParser() - parser.add_argument("file", default='',nargs='?', help="Write the file path on here") - args = parser.parse_args() - if args.file == '': - return '' - else: - return args.file - -def movie_lists(escape_folder): - escape_folder = re.split('[,,]', escape_folder) - total = [] - file_type = ['.mp4', '.avi', '.rmvb', '.wmv', '.mov', '.mkv', '.flv', '.ts', '.webm', '.MP4', '.AVI', '.RMVB', '.WMV','.MOV', '.MKV', '.FLV', '.TS', '.WEBM', ] - file_root = os.getcwd() - for root, dirs, files in os.walk(file_root): - flag_escape = 0 - for folder in escape_folder: - if folder in root: - flag_escape = 1 - break - if flag_escape == 1: - continue - for f in files: - if os.path.splitext(f)[1] in file_type: - path = os.path.join(root, f) - path = path.replace(file_root, '.') - total.append(path) - return total - - -def CreatFailedFolder(failed_folder): - if not os.path.exists(failed_folder + '/'): # 新建failed文件夹 - try: - os.makedirs(failed_folder + '/') - except: - print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)") - os._exit(0) - - -def CEF(path): - try: - files = os.listdir(path) # 获取路径下的子文件(夹)列表 - for file in files: - os.removedirs(path + '/' + file) # 删除这个空文件夹 - print('[+]Deleting empty folder', path + '/' + file) - except: - a = '' - - -def getNumber(filepath,absolute_path = False): - if absolute_path == True: - filepath=filepath.replace('\\','/') - file_number = str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-') - return file_number - if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号 - filepath = filepath.replace("_", "-") - filepath.strip('22-sht.me').strip('-HD').strip('-hd') - filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间 - if 'FC2' or 'fc2' in filename: - filename = filename.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-','FC2-').replace('FC2PPV_','FC2-') - file_number = re.search(r'\w+-\w+', filename, re.A).group() - return file_number - else: # 提取不含减号-的番号,FANZA CID - try: - return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-') - except: - return re.search(r'(.+?)\.', filepath)[0] - - -if __name__ == '__main__': - version = '2.8.2' - config_file = 'config.ini' - config = ConfigParser() - config.read(config_file, encoding='UTF-8') - success_folder = config['common']['success_output_folder'] - failed_folder = config['common']['failed_output_folder'] # 失败输出目录 - escape_folder = config['escape']['folders'] # 多级目录刮削需要排除的目录 - print('[*]================== AV Data Capture ===================') - print('[*] Version ' + version) - print('[*]======================================================') - - UpdateCheck(version) - CreatFailedFolder(failed_folder) - os.chdir(os.getcwd()) - movie_list = movie_lists(escape_folder) - - #========== 野鸡番号拖动 ========== - number_argparse=argparse_get_file() - if not number_argparse == '': - print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]") - core_main(number_argparse, getNumber(number_argparse,absolute_path = True)) - print("[*]======================================================") - CEF(success_folder) - CEF(failed_folder) - print("[+]All finished!!!") - input("[+][+]Press enter key exit, you can check the error messge before you exit.") - os._exit(0) - # ========== 野鸡番号拖动 ========== - - count = 0 - count_all = str(len(movie_list)) - print('[+]Find', count_all, 'movies') - if config['common']['soft_link'] == '1': - print('[!] --- Soft link mode is ENABLE! ----') - for i in movie_list: # 遍历电影列表 交给core处理 - count = count + 1 - percentage = str(count / int(count_all) * 100)[:4] + '%' - print('[!] - ' + percentage + ' [' + str(count) + '/' + count_all + '] -') - # print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]") - # core_main(i, getNumber(i)) - # print("[*]======================================================") - try: - print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]") - core_main(i, getNumber(i)) - print("[*]======================================================") - except: # 番号提取异常 - print('[-]' + i + ' Cannot catch the number :') - if config['common']['soft_link'] == '1': - print('[-]Link', i, 'to failed folder') - os.symlink(i, str(os.getcwd()) + '/' + failed_folder + '/') - else: - try: - print('[-]Move ' + i + ' to failed folder') - shutil.move(i, str(os.getcwd()) + '/' + failed_folder + '/') - except FileExistsError: - print('[!]File exists in failed!') - except: - print('[+]skip') - continue - - CEF(success_folder) - CEF(failed_folder) - print("[+]All finished!!!") - input("[+][+]Press enter key exit, you can check the error messge before you exit.") +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import glob +import os +import time +import fuckit +from tenacity import retry, stop_after_delay, wait_fixed +import json +import shutil +import itertools +import argparse +from pathlib import Path + +from core import * +from ConfigApp import ConfigApp +from PathNameProcessor import PathNameProcessor + +# TODO 封装聚合解耦:CORE +# TODO (学习)统一依赖管理工具 +# TODO 不同媒体服务器尽量兼容统一一种元数据 如nfo 海报等(emby,jellyfin,plex) +# TODO 字幕整理功能 文件夹中读取所有字幕 并提番号放入对应缓存文件夹中TEMP + +config = ConfigApp() + + +def safe_list_get(list_in, idx, default=None): + """ + 数组安全取值 + :param list_in: + :param idx: + :param default: + :return: + """ + try: + return list_in[idx] + except IndexError: + return default + + +def UpdateCheck(version): + if UpdateCheckSwitch() == '1': + html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json') + html = json.loads(str(html2)) + + if not version == html['version']: + print('[*] * New update ' + html['version'] + ' *') + print('[*] ↓ Download ↓') + print('[*] ' + html['download']) + print('[*]======================================================') + else: + print('[+]Update Check disabled!') + + +def argparse_get_file(): + parser = argparse.ArgumentParser() + parser.add_argument("file", default='', nargs='?', help="Write the file path on here") + args = parser.parse_args() + if args.file == '': + return '' + else: + return args.file + + +def movie_lists(escape_folders): + escape_folders = re.split('[,,]', escape_folders) + total = [] + + for root, dirs, files in os.walk(config.search_folder): + if root in escape_folders: + continue + for file in files: + if re.search(PathNameProcessor.pattern_of_file_name_suffixes, file, re.IGNORECASE): + path = os.path.join(root, file) + total.append(path) + return total + + +# def CEF(path): +# try: +# files = os.listdir(path) # 获取路径下的子文件(夹)列表 +# for file in files: +# os.removedirs(path + '/' + file) # 删除这个空文件夹 +# print('[+]Deleting empty folder', path + '/' + file) +# except: +# a = '' +# + + +def get_numbers(paths): + """提取对应路径的番号+集数""" + + def get_number(filepath, absolute_path=False): + """ + 获取番号,集数 + :param filepath: + :param absolute_path: + :return: + """ + name = filepath.upper() # 转大写 + if absolute_path: + name = name.replace('\\', '/') + # 移除干扰字段 + name = PathNameProcessor.remove_distractions(name) + # 抽取 文件路径中可能存在的尾部集数,和抽取尾部集数的后的文件路径 + suffix_episode, name = PathNameProcessor.extract_suffix_episode(name) + # 抽取 文件路径中可能存在的 番号后跟随的集数 和 处理后番号 + episode_behind_code, code_number = PathNameProcessor.extract_code(name) + # 无番号 则设置空字符 + code_number = code_number if code_number else '' + # 优先取尾部集数,无则取番号后的集数(几率低),都无则为空字符 + episode = suffix_episode if suffix_episode else episode_behind_code if episode_behind_code else '' + + return code_number, episode + + maps = {} + for path in paths: + number, episode = get_number(path) + maps[path] = (number, episode) + + return maps + + +def create_folder(paths): + for path_to_make in paths: + if path_to_make: + try: + os.makedirs(path_to_make) + except FileExistsError as e: + # name = f'{folder=}'.split('=')[0].split('.')[-1] + print(path_to_make + " 已经存在") + pass + except Exception as exception: + print('! 创建文件夹 ' + path_to_make + ' 失败,文件夹路径错误或权限不够') + raise exception + else: + raise Exception('!创建的文件夹路径为空,请确认') + + +if __name__ == '__main__': + version = '2.8.2' + + print('[*]================== AV Data Capture ===================') + print('[*] Version ' + version) + print('[*]======================================================') + + # UpdateCheck(version) + + CreatFailedFolder(config.failed_folder) + os.chdir(os.getcwd()) + + # 创建文件夹 + create_folder([config.failed_folder, config.search_folder, config.temp_folder]) + + # temp 文件夹中infos放 番号json信息,pics中放图片信息 + path_infos = config.temp_folder + '/infos' + path_pics = config.temp_folder + '/pics' + + create_folder([path_infos, path_pics]) + + # 遍历搜索目录下所有视频的路径 + movie_list = movie_lists(config.escape_folder) + + # 以下是从文本中提取测试的数据 + # f = open('TestPathNFO.txt', 'r') + # f = open('TestPathSpecial.txt', 'r') + # movie_list = [line[:-1] for line in f.readlines()] + # f.close() + + # 获取 番号,集数,路径 的字典->list + code_ep_paths = [[codeEposode[0], codeEposode[1], path] for path, codeEposode in get_numbers(movie_list).items()] + [print(i) for i in code_ep_paths] + # 按番号分组片子列表(重点),用于寻找相同番号的片子 + ''' + 这里利用pandas分组 "https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html" + + ''' + # # 设置打印时显示所有列 + # pd.set_option('display.max_columns', None) + # # 显示所有行 + # pd.set_option('display.max_rows', None) + # # 设置value的显示长度为100,默认为50 + # pd.set_option('max_colwidth', 30) + # # 创建框架 + # df = pd.DataFrame(code_ep_paths, columns=('code', 'ep', 'path')) + # # 以番号分组 + # groupedCode_code_ep_paths = df.groupby(['code']) + # # print(df.groupby(['code', 'ep']).describe().unstack()) + # grouped_code_ep = df.groupby(['code', 'ep'])['path'] + # + sorted_code_list = sorted(code_ep_paths, key=lambda code_ep_path: code_ep_path[0]) + group_code_list = itertools.groupby(sorted_code_list, key=lambda code_ep_path: code_ep_path[0]) + + + def group_code_list_to_dict(group_code_list): + data_dict = {} + for code, code_ep_path_group in group_code_list: + code_ep_path_list = list(code_ep_path_group) + eps_of_code = {} + group_ep_list = itertools.groupby(code_ep_path_list, key=lambda code_ep_path: code_ep_path[1]) + for ep, group_ep_group in group_ep_list: + group_ep_list = list(group_ep_group) + eps_of_code[ep] = [code_ep_path[2] for code_ep_path in group_ep_list] + data_dict[code] = eps_of_code + + return data_dict + + + def print_same_code_ep_path(data_dict_in): + for code_in in data_dict_in: + ep_path_list = data_dict_in[code_in] + if len(ep_path_list) > 1: + print('--' * 60) + print("|" + (code_in if code_in else 'unknown') + ":") + + # group_ep_list = itertools.groupby(code_ep_path_list.items(), key=lambda code_ep_path: code_ep_path[0]) + for ep in ep_path_list: + path_list = ep_path_list[ep] + print('--' * 12) + ep = ep if ep else ' ' + if len(path_list) == 1: + print('| 集数:' + ep + ' 文件: ' + path_list[0]) + else: + print('| 集数:' + ep + ' 文件: ') + for path in path_list: + print('| ' + path) + + else: + pass + + + # 分好组的数据 {code:{ep:[path]}} + data_dict_groupby_code_ep = group_code_list_to_dict(group_code_list) + + print('--' * 100) + print("找到影片数量:" + str(len(movie_list))) + print("合计番号数量:" + str(len(data_dict_groupby_code_ep)) + " (多个相同番号的影片只统计一个,不能识别的番号 都统一为'unknown')") + print('Warning:!!!! 以下为相同番号的电影明细') + print('◤' + '--' * 80) + print_same_code_ep_path(data_dict_groupby_code_ep) + print('◣' + '--' * 80) + + isContinue = input('任意键继续? N 退出 \n') + if isContinue.strip(' ') == "N": + exit(1) + + + # ========== 野鸡番号拖动 ========== + # number_argparse = argparse_get_file() + # if not number_argparse == '': + # print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse, + # absolute_path=True) + "]") + # nfo = core_main(number_argparse, getNumber(number_argparse, absolute_path=True)) + # print("[*]======================================================") + # CEF(config.success_folder) + # CEF(config.failed_folder) + # print("[+]All finished!!!") + # input("[+][+]Press enter key exit, you can check the error messge before you exit.") + # os._exit(0) + # ========== 野鸡番号拖动 ========== + + def download_code_infos(code_list, is_read_cache=True): + """ + 遍历按番号分组的集合,刮取番号信息并缓存 + + :param is_read_cache: 是否读取缓存数据 + :param code_list: + :return: {code:nfo} + """ + count_all_grouped = len(code_list) + count = 0 + code_info_dict = {} + + for code in code_list: + count = count + 1 + percentage = str(count / int(count_all_grouped) * 100)[:4] + '%' + print('[!] - ' + percentage + ' [' + str(count) + '/' + str(count_all_grouped) + '] -') + try: + print("[!]搜刮数据 [" + code + "]") + if code: + # 创建番号的文件夹 + file_path = path_infos + '/' + code + '.json' + nfo = {} + # 读取缓存信息,如果没有则联网搜刮 + + path = Path(file_path) + if is_read_cache and (path.exists() and path.is_file() and path.stat().st_size > 0): + print('找到缓存信息') + with open(file_path) as fp: + nfo = json.load(fp) + else: + + # 核心功能 - 联网抓取信息字典 + print('联网搜刮') + nfo = core_main(code) + print('正在写入', end='') + + # 把缓存信息写入缓存文件夹中,有时会设备占用而失败,重试即可 + @retry(stop=stop_after_delay(3), wait=wait_fixed(2)) + def read_file(): + with open(file_path, 'w') as fp: + json.dump(nfo, fp) + + read_file() + print('完成!') + # 将番号信息放入字典 + code_info_dict[code] = nfo + print("[*]======================================================") + + except Exception as e: # 番号的信息获取失败 + code_info_dict[code] = '' + print("找不到信息:" + code + ',Reason:' + str(e)) + + # if config.soft_link: + # print('[-]Link', file_path_name, 'to failed folder') + # os.symlink(file_path_name, config.failed_folder + '/') + # else: + # try: + # print('[-]Move ' + file_path_name + ' to failed folder:' + config.failed_folder) + # shutil.move(file_path_name, config.failed_folder + '/') + # except FileExistsError: + # print('[!]File exists in failed!') + # except: + # print('[+]skip') + continue + return code_info_dict + + + print('----------------------------------') + code_infos = download_code_infos(data_dict_groupby_code_ep) + print("----未找到番号数据的番号----") + print([print(code) for code in code_infos if code_infos[code] == '']) + print("-------------------------") + + + def download_images_of_nfos(code_info_dict): + """ + 遍历番号信息,下载番号电影的海报,图片 + :param code_info_dict: + :return: 无图片的信息的番号 + """ + + code_list_empty_image = [] + for code in code_info_dict: + nfo = code_info_dict[code] + if len(nfo.keys()) == 0: + code_list_empty_image.append(code) + continue + + code_pics_folder_to_save = path_pics + '/' + code + # 1 创建 番号文件夹 + os.makedirs(code_pics_folder_to_save, exist_ok=True) + # 下载缩略图 + if nfo['imagecut'] == 3: # 3 是缩略图 + path = Path(code_pics_folder_to_save + '/' + 'thumb.png') + if path.exists() and path.is_file() and path.stat().st_size > 0: + print(code + ':缩略图已有缓存') + else: + print(code + ':缩略图下载中...') + download_file(nfo['cover_small'], code_pics_folder_to_save, 'thumb.png') + print(code + ':缩略图下载完成') + # 下载海报 + path = Path(code_pics_folder_to_save + '/' + 'poster.png') + if path.exists() and path.is_file() and path.stat().st_size > 0: + print(code + ':海报已有缓存') + else: + print(code + ':海报下载中...') + download_file(nfo['cover'], code_pics_folder_to_save, 'poster.png') + print(code + ':海报下载完成') + return code_list_empty_image + + + + code_list_empty = download_images_of_nfos(code_infos) + print("----未找到集数的番号----") + print([print(code) for code in code_list_empty]) + print("------搜刮未找到集数的番号------") + code_infos_of_no_ep = download_code_infos(code_list_empty, is_read_cache=False) + print("----还是未找到番号数据的番号----") + print([print(code) for code in code_infos_of_no_ep if code_infos_of_no_ep[code] == '']) + print("----------------------") + # 开始操作 + # # 2 创建缩略图海报 + # if nfo['imagecut'] == 3: # 3 是缩略图 + # download_cover_file(nfo['cover_small'], code, code_pics_folder_to_save) + # # 3 创建图 + # download_image(nfo['cover'], code, code_pics_folder_to_save) + # # 4 剪裁 + # crop_image(nfo['imagecut'], code, code_pics_folder_to_save) + # # 5 背景图 + # copy_images_to_background_image(code, code_pics_folder_to_save) + # 6 创建 mame.nfo(不需要,需要时从infos中josn文件转为nfo文件) + # make_nfo_file(nfo, code, temp_path_to_save) + # 相同番号处理:按集数添加-CD[X];视频格式 and 大小 分; + # TODO 方式1 刮削:添加nfo,封面,内容截图等 + # 6 创建 mame.nfo(不需要,需要时从infos中josn文件转为nfo文件) + make_nfo_file(nfo, code, temp_path_to_save) + # TODO 方式2 整理:按规则移动影片,字幕 到 演员,发行商,有无🐎 等 + + # if config.program_mode == '1': + # if multi_part == 1: + # number += part # 这时number会被附加上CD1后缀 + # smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, filepath, config.failed_folder) # 检查小封面 + # imageDownload(option, json_data['cover'], number, c_word, path, multi_part, filepath, config.failed_folder) # creatFoder会返回番号路径 + # cutImage(option, imagecut, path, number, c_word) # 裁剪图 + # copyRenameJpgToBackdrop(option, path, number, c_word) + # PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, config.failed_folder, tag) # 打印文件 .nfo + # pasteFileToFolder(filepath, path, number, c_word) # 移动文件 + # # =======================================================================整理模式 + # elif config.program_mode == '2': + # pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件 + + # CEF(config.success_folder) + # CEF(config.failed_folder) + print("[+]All finished!!!") + input("[+][+]Press enter key exit, you can check the error message before you exit.") diff --git a/ConfigApp.py b/ConfigApp.py new file mode 100755 index 0000000..0d8c835 --- /dev/null +++ b/ConfigApp.py @@ -0,0 +1,28 @@ +from configparser import ConfigParser + +from MediaServer import MediaServer + + +class ConfigApp: + def __init__(self): + config_file = 'config.ini' + config = ConfigParser() + config.read(config_file, encoding='UTF-8') + self.success_folder = config['common']['success_output_folder'] + self.failed_folder = config['common']['failed_output_folder'] # 失败输出目录 + self.escape_folder = config['escape']['folders'] # 多级目录刮削需要排除的目录 + self.search_folder = config['common']['search_folder'] # 搜索路径 + self.temp_folder = config['common']['temp_folder'] # 临时资源路径 + self.soft_link = (config['common']['soft_link'] == 1) + # self.escape_literals = (config['escape']['literals'] == 1) + self.naming_rule = config['Name_Rule']['naming_rule'] + self.location_rule = config['Name_Rule']['location_rule'] + + self.proxy = config['proxy']['proxy'] + self.timeout = float(config['proxy']['timeout']) + self.retry = int(config['proxy']['retry']) + self.media_server = MediaServer[config['media']['media_warehouse']] + self.update_check = config['update']['update_check'] + self.debug_mode = config['debug_mode']['switch'] + + diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/LearningNote/GroupbyDemo.py b/LearningNote/GroupbyDemo.py new file mode 100644 index 0000000..558a415 --- /dev/null +++ b/LearningNote/GroupbyDemo.py @@ -0,0 +1,19 @@ +import pandas as pd +import numpy as np + +df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', + 'foo', 'bar', 'foo', 'foo'], + 'B': ['one', 'one', 'two', 'three', + 'two', 'two', 'one', 'three'], + 'C': np.random.randn(8), + 'D': np.random.randn(8)}) + +print(df) +groupedA = df.groupby('A').describe() +groupedAB = df.groupby(['A', 'B'])['C'] +print('---'*18) +for a, b in groupedAB: + print('--'*18) + print(a) + print('-' * 18) + print(b) diff --git a/LearningNote/PandasDemo.py b/LearningNote/PandasDemo.py new file mode 100644 index 0000000..0ed8aad --- /dev/null +++ b/LearningNote/PandasDemo.py @@ -0,0 +1,38 @@ +import pandas as pd +import numpy as np + +''' +python数据处理三剑客之一pandas +https://pandas.pydata.org/pandas-docs/stable/user_guide +https://www.pypandas.cn/docs/getting_started/10min.html +''' + +dates = pd.date_range('20130101', periods=6) +df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) +print(dates) +print(df) + +df2 = pd.DataFrame({'A': 1., + 'B': pd.Timestamp('20130102'), + 'C': pd.Series(1, index=list(range(4)), dtype='float32'), + 'D': np.array([3] * 4, dtype='int32'), + 'E': pd.Categorical(["test", "train", "test", "train"]), + 'F': 'foo'}) +print(df2) +print(df2.dtypes) +print(df.head()) +print(df.tail(5)) +print(df.index) +print(df.columns) +df.describe() # 统计数据摘要 +df.T # index columns互转 +df.sort_index(axis=1, ascending=False) # 排序,axis=1 是columns,axis=1 是index +df.sort_values(by='B') # 按值排序 按B列中的值排序 + +# 切行 +df.A +df['A'] +# 切行 +df['20130102':'20130104'] +df[0:3] + diff --git a/MediaServer.py b/MediaServer.py new file mode 100644 index 0000000..52e1530 --- /dev/null +++ b/MediaServer.py @@ -0,0 +1,28 @@ +from enum import Enum, auto + + +class MediaServer(Enum): + EMBY = auto() + PLEX = auto() + KODI = auto() + + # media = EMBY + # + # def __init__(self, arg): + # self = [e for e in MediaServer if arg.upper() == self.name] + + def poster_name(self, name): + if self == MediaServer.EMBY: # 保存[name].png + return name + '.png' + elif self == MediaServer.KODI: # 保存[name]-poster.jpg + return name + '-poster.jpg' + elif self == MediaServer.PLEX: # 保存 poster.jpg + return 'poster.jpg' + + def image_name(self, name): + if self == MediaServer.EMBY: # name.jpg + return name + '.jpg' + elif self == MediaServer.KODI: # [name]-fanart.jpg + return name + '-fanart.jpg' + elif self == MediaServer.PLEX: # fanart.jpg + return 'fanart.jpg' diff --git a/Metadate.py b/Metadate.py new file mode 100644 index 0000000..9acf3c5 --- /dev/null +++ b/Metadate.py @@ -0,0 +1,3 @@ +from addict import Dict + +# class Metadata: diff --git a/PathNameProcessor.py b/PathNameProcessor.py new file mode 100644 index 0000000..fd87842 --- /dev/null +++ b/PathNameProcessor.py @@ -0,0 +1,115 @@ +import re + +import fuckit + + +class PathNameProcessor: + # 类变量 + pattern_of_file_name_suffixes = r'.(mov|mp4|avi|rmvb|wmv|mov|mkv|flv|ts|m2ts)$' + + # def __init__(self): + + @staticmethod + def remove_distractions(origin_name): + """移除干扰项""" + # 移除文件类型后缀 + origin_name = re.sub(PathNameProcessor.pattern_of_file_name_suffixes, '', origin_name, 0, re.IGNORECASE) + + # 处理包含减号-和_的番号'/-070409_621' + origin_name = re.sub(r'[-_~*# ]', "-", origin_name, 0) + + origin_name = re.sub(r'(Carib)(bean)?', '-', origin_name, 0, re.IGNORECASE) + origin_name = re.sub(r'(1pondo)', '-', origin_name, 0, re.IGNORECASE) + origin_name = re.sub(r'(tokyo)[-. ]?(hot)', '-', origin_name, 0, re.IGNORECASE) + origin_name = re.sub(r'Uncensored', '-', origin_name, 0, re.IGNORECASE) + origin_name = re.sub(r'JAV', '-', origin_name, 0, re.IGNORECASE) + # 移除干扰字段 + origin_name = origin_name.replace('22-sht.me', '-') + + # 去除文件名中时间 1970-2099年 月 日 + pattern_of_date = r'(?:-)(19[789]\d|20\d{2})(-?(0\d|1[012])-?(0[1-9]|[12]\d|3[01])?)?[-.]' + # 移除字母开头 清晰度相关度 字符 + pattern_of_resolution_alphas = r'(? NTTR-037 , SIVR-00008 -> SIVR-008 ,但是heyzo除外 + if "heyzo" not in name.lower(): + searched = re.search(r'([a-zA-Z]{2,})-(?:0*)(\d{3,})', name) + if searched: + name = '-'.join(searched.groups()) + + return episode, name + + @staticmethod + def extract_episode_behind_code(origin_name, code): + episode = None + + with fuckit: + # 零宽断言获取尾部字幕 剧集数 abc123 + result_dict = re.search(rf'(?<={code})-?((?P([A-Z](?![A-Z])))|(?P\d(?!\d)))', origin_name, + re.I).groupdict() + episode = result_dict['alpha'] or result_dict['num'] + return episode + + +def safe_list_get(list_in, idx, default): + try: + return list_in[idx] + except IndexError: + return default diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..cca1b93 --- /dev/null +++ b/Pipfile @@ -0,0 +1,19 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] + +[packages] +bs4 = "*" +tenacity = "*" +fuckit = "*" +requests = "*" +image = "*" +lazyxml = {editable = true,git = "https://github.com/waynedyck/lazyxml.git",ref = "python-3-conversion_wd1"} +lxml = "*" +pyquery = "*" + +[requires] +python_version = "3.8" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..1ca43ea --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,246 @@ +{ + "_meta": { + "hash": { + "sha256": "15bf3c6af3ec315358a0217481a13285f95fc742bb5db8a1f934e0d1c3d7d5e2" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.8" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "asgiref": { + "hashes": [ + "sha256:5ee950735509d04eb673bd7f7120f8fa1c9e2df495394992c73234d526907e17", + "sha256:7162a3cb30ab0609f1a4c95938fd73e8604f63bdba516a7f7d64b83ff09478f0" + ], + "markers": "python_version >= '3.5'", + "version": "==3.3.1" + }, + "beautifulsoup4": { + "hashes": [ + "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35", + "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25", + "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666" + ], + "version": "==4.9.3" + }, + "bs4": { + "hashes": [ + "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a" + ], + "index": "pypi", + "version": "==0.0.1" + }, + "certifi": { + "hashes": [ + "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", + "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" + ], + "version": "==2020.12.5" + }, + "chardet": { + "hashes": [ + "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", + "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", + "version": "==4.0.0" + }, + "cssselect": { + "hashes": [ + "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf", + "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.1.0" + }, + "django": { + "hashes": [ + "sha256:2d78425ba74c7a1a74b196058b261b9733a8570782f4e2828974777ccca7edf7", + "sha256:efa2ab96b33b20c2182db93147a0c3cd7769d418926f9e9f140a60dca7c64ca9" + ], + "markers": "python_version >= '3.6'", + "version": "==3.1.5" + }, + "fuckit": { + "hashes": [ + "sha256:059488e6aa2053da9db5eb5101e2498f608314da5118bf2385acb864568ccc25" + ], + "index": "pypi", + "version": "==4.8.1" + }, + "idna": { + "hashes": [ + "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", + "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==2.10" + }, + "image": { + "hashes": [ + "sha256:baa2e09178277daa50f22fd6d1d51ec78f19c12688921cb9ab5808743f097126" + ], + "index": "pypi", + "version": "==1.5.33" + }, + "lazyxml": { + "editable": true, + "git": "https://github.com/waynedyck/lazyxml.git", + "ref": "f42ea4a4febf4c1e120b05d6ca9cef42556a75d5" + }, + "lxml": { + "hashes": [ + "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d", + "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37", + "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01", + "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2", + "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644", + "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75", + "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80", + "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2", + "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780", + "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98", + "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308", + "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf", + "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388", + "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d", + "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3", + "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8", + "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af", + "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2", + "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e", + "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939", + "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03", + "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d", + "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a", + "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5", + "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a", + "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711", + "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf", + "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089", + "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505", + "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b", + "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f", + "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc", + "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e", + "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931", + "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc", + "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe", + "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e" + ], + "index": "pypi", + "version": "==4.6.2" + }, + "pillow": { + "hashes": [ + "sha256:165c88bc9d8dba670110c689e3cc5c71dbe4bfb984ffa7cbebf1fac9554071d6", + "sha256:1d208e670abfeb41b6143537a681299ef86e92d2a3dac299d3cd6830d5c7bded", + "sha256:22d070ca2e60c99929ef274cfced04294d2368193e935c5d6febfd8b601bf865", + "sha256:2353834b2c49b95e1313fb34edf18fca4d57446675d05298bb694bca4b194174", + "sha256:39725acf2d2e9c17356e6835dccebe7a697db55f25a09207e38b835d5e1bc032", + "sha256:3de6b2ee4f78c6b3d89d184ade5d8fa68af0848f9b6b6da2b9ab7943ec46971a", + "sha256:47c0d93ee9c8b181f353dbead6530b26980fe4f5485aa18be8f1fd3c3cbc685e", + "sha256:5e2fe3bb2363b862671eba632537cd3a823847db4d98be95690b7e382f3d6378", + "sha256:604815c55fd92e735f9738f65dabf4edc3e79f88541c221d292faec1904a4b17", + "sha256:6c5275bd82711cd3dcd0af8ce0bb99113ae8911fc2952805f1d012de7d600a4c", + "sha256:731ca5aabe9085160cf68b2dbef95fc1991015bc0a3a6ea46a371ab88f3d0913", + "sha256:7612520e5e1a371d77e1d1ca3a3ee6227eef00d0a9cddb4ef7ecb0b7396eddf7", + "sha256:7916cbc94f1c6b1301ac04510d0881b9e9feb20ae34094d3615a8a7c3db0dcc0", + "sha256:81c3fa9a75d9f1afafdb916d5995633f319db09bd773cb56b8e39f1e98d90820", + "sha256:887668e792b7edbfb1d3c9d8b5d8c859269a0f0eba4dda562adb95500f60dbba", + "sha256:93a473b53cc6e0b3ce6bf51b1b95b7b1e7e6084be3a07e40f79b42e83503fbf2", + "sha256:96d4dc103d1a0fa6d47c6c55a47de5f5dafd5ef0114fa10c85a1fd8e0216284b", + "sha256:a3d3e086474ef12ef13d42e5f9b7bbf09d39cf6bd4940f982263d6954b13f6a9", + "sha256:b02a0b9f332086657852b1f7cb380f6a42403a6d9c42a4c34a561aa4530d5234", + "sha256:b09e10ec453de97f9a23a5aa5e30b334195e8d2ddd1ce76cc32e52ba63c8b31d", + "sha256:b6f00ad5ebe846cc91763b1d0c6d30a8042e02b2316e27b05de04fa6ec831ec5", + "sha256:bba80df38cfc17f490ec651c73bb37cd896bc2400cfba27d078c2135223c1206", + "sha256:c3d911614b008e8a576b8e5303e3db29224b455d3d66d1b2848ba6ca83f9ece9", + "sha256:ca20739e303254287138234485579b28cb0d524401f83d5129b5ff9d606cb0a8", + "sha256:cb192176b477d49b0a327b2a5a4979552b7a58cd42037034316b8018ac3ebb59", + "sha256:cdbbe7dff4a677fb555a54f9bc0450f2a21a93c5ba2b44e09e54fcb72d2bd13d", + "sha256:cf6e33d92b1526190a1de904df21663c46a456758c0424e4f947ae9aa6088bf7", + "sha256:d355502dce85ade85a2511b40b4c61a128902f246504f7de29bbeec1ae27933a", + "sha256:d673c4990acd016229a5c1c4ee8a9e6d8f481b27ade5fc3d95938697fa443ce0", + "sha256:dc577f4cfdda354db3ae37a572428a90ffdbe4e51eda7849bf442fb803f09c9b", + "sha256:dd9eef866c70d2cbbea1ae58134eaffda0d4bfea403025f4db6859724b18ab3d", + "sha256:f50e7a98b0453f39000619d845be8b06e611e56ee6e8186f7f60c3b1e2f0feae" + ], + "markers": "python_version >= '3.6'", + "version": "==8.1.0" + }, + "pyquery": { + "hashes": [ + "sha256:1fc33b7699455ed25c75282bc8f80ace1ac078b0dda5a933dacbd8b1c1f83963", + "sha256:a388eefb6bc4a55350de0316fbd97cda999ae669b6743ae5b99102ba54f5aa72" + ], + "index": "pypi", + "version": "==1.4.3" + }, + "pytz": { + "hashes": [ + "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4", + "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5" + ], + "version": "==2020.5" + }, + "requests": { + "hashes": [ + "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", + "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" + ], + "index": "pypi", + "version": "==2.25.1" + }, + "six": { + "hashes": [ + "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", + "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", + "version": "==1.15.0" + }, + "soupsieve": { + "hashes": [ + "sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851", + "sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e" + ], + "markers": "python_version >= '3.0'", + "version": "==2.1" + }, + "sqlparse": { + "hashes": [ + "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0", + "sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8" + ], + "markers": "python_version >= '3.5'", + "version": "==0.4.1" + }, + "tenacity": { + "hashes": [ + "sha256:baed357d9f35ec64264d8a4bbf004c35058fad8795c5b0d8a7dc77ecdcbb8f39", + "sha256:e14d191fb0a309b563904bbc336582efe2037de437e543b38da749769b544d7f" + ], + "index": "pypi", + "version": "==6.3.1" + }, + "urllib3": { + "hashes": [ + "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08", + "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473" + ], + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", + "version": "==1.26.2" + } + }, + "develop": {} +} diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/avsox.py b/SiteSource/avsox.py old mode 100644 new mode 100755 similarity index 96% rename from avsox.py rename to SiteSource/avsox.py index 67ee9bf..87ae401 --- a/avsox.py +++ b/SiteSource/avsox.py @@ -1,115 +1,116 @@ -import re -from lxml import etree -import json -from bs4 import BeautifulSoup -from ADC_function import * -# import sys -# import io -# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) - -def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img - soup = BeautifulSoup(htmlcode, 'lxml') - a = soup.find_all(attrs={'class': 'avatar-box'}) - d = {} - for i in a: - l = i.img['src'] - t = i.span.get_text() - p2 = {t: l} - d.update(p2) - return d -def getTitle(a): - try: - html = etree.fromstring(a, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0] - return result.replace('/', '') - except: - return '' -def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() - soup = BeautifulSoup(a, 'lxml') - a = soup.find_all(attrs={'class': 'avatar-box'}) - d = [] - for i in a: - d.append(i.span.get_text()) - return d -def getStudio(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ') - return result1 -def getRuntime(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']") - return result1 -def getLabel(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']") - return result1 -def getNum(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']") - return result1 -def getYear(release): - try: - result = str(re.search('\d{4}',release).group()) - return result - except: - return release -def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']") - return result1 -def getCover(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']") - return result -def getCover_small(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") - return result -def getTag(a): # 获取演员 - soup = BeautifulSoup(a, 'lxml') - a = soup.find_all(attrs={'class': 'genre'}) - d = [] - for i in a: - d.append(i.get_text()) - return d - -def main(number): - a = get_html('https://avsox.host/cn/search/' + number) - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") - if result1 == '' or result1 == 'null' or result1 == 'None': - a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_')) - print(a) - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") - if result1 == '' or result1 == 'null' or result1 == 'None': - a = get_html('https://avsox.host/cn/search/' + number.replace('_', '')) - print(a) - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") - web = get_html(result1) - soup = BeautifulSoup(web, 'lxml') - info = str(soup.find(attrs={'class': 'row movie'})) - dic = { - 'actor': getActor(web), - 'title': getTitle(web).strip(getNum(web)), - 'studio': getStudio(info), - 'outline': '',# - 'runtime': getRuntime(info), - 'director': '', # - 'release': getRelease(info), - 'number': getNum(info), - 'cover': getCover(web), - 'cover_small': getCover_small(a), - 'imagecut': 3, - 'tag': getTag(web), - 'label': getLabel(info), - 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': getActorPhoto(web), - 'website': result1, - 'source': 'avsox.py', - } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js - +import re +from lxml import etree +import json +from bs4 import BeautifulSoup +from ADC_function import * +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) + +def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find_all(attrs={'class': 'avatar-box'}) + d = {} + for i in a: + l = i.img['src'] + t = i.span.get_text() + p2 = {t: l} + d.update(p2) + return d +def getTitle(a): + try: + html = etree.fromstring(a, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0] + return result.replace('/', '') + except: + return '' +def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + soup = BeautifulSoup(a, 'lxml') + a = soup.find_all(attrs={'class': 'avatar-box'}) + d = [] + for i in a: + d.append(i.span.get_text()) + return d +def getStudio(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ') + return result1 +def getRuntime(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']") + return result1 +def getLabel(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']") + return result1 +def getNum(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']") + return result1 +def getYear(release): + try: + result = str(re.search('\d{4}',release).group()) + return result + except: + return release +def getRelease(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']") + return result1 +def getCover(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']") + return result +def getCover_small(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']") + return result +def getTag(a): # 获取演员 + soup = BeautifulSoup(a, 'lxml') + a = soup.find_all(attrs={'class': 'genre'}) + d = [] + for i in a: + d.append(i.get_text()) + return d + +def main(number): + url = 'https://avsox.host/cn/search/' + number + a = get_html(url) + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") + if result1 == '' or result1 == 'null' or result1 == 'None': + a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_')) + print(a) + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") + if result1 == '' or result1 == 'null' or result1 == 'None': + a = get_html('https://avsox.host/cn/search/' + number.replace('_', '')) + print(a) + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") + web = get_html(result1) + soup = BeautifulSoup(web, 'lxml') + info = str(soup.find(attrs={'class': 'row movie'})) + dic = { + 'actor': getActor(web), + 'title': getTitle(web).strip(getNum(web)), + 'studio': getStudio(info), + 'outline': '',# + 'runtime': getRuntime(info), + 'director': '', # + 'release': getRelease(info), + 'number': getNum(info), + 'cover': getCover(web), + 'cover_small': getCover_small(a), + 'imagecut': 3, + 'tag': getTag(web), + 'label': getLabel(info), + 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': getActorPhoto(web), + 'website': result1, + 'source': 'avsox.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js + #print(main('012717_472')) \ No newline at end of file diff --git a/fanza.py b/SiteSource/fanza.py old mode 100644 new mode 100755 similarity index 97% rename from fanza.py rename to SiteSource/fanza.py index 87c8be0..72632dc --- a/fanza.py +++ b/SiteSource/fanza.py @@ -1,229 +1,229 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- -import json -import re - -from lxml import etree - -from ADC_function import * - -# import sys -# import io -# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) - - -def getTitle(text): - html = etree.fromstring(text, etree.HTMLParser()) - result = html.xpath('//*[@id="title"]/text()')[0] - return result - - -def getActor(text): - # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() - html = etree.fromstring(text, etree.HTMLParser()) - result = ( - str( - html.xpath( - "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()" - ) - ) - .strip(" ['']") - .replace("', '", ",") - ) - return result - - -def getStudio(text): - html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - try: - result = html.xpath( - "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()" - )[0] - except: - result = html.xpath( - "//td[contains(text(),'メーカー')]/following-sibling::td/text()" - )[0] - return result - - -def getRuntime(text): - html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] - return re.search(r"\d+", str(result)).group() - - -def getLabel(text): - html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - try: - result = html.xpath( - "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()" - )[0] - except: - result = html.xpath( - "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()" - )[0] - return result - - -def getNum(text): - html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - try: - result = html.xpath( - "//td[contains(text(),'品番:')]/following-sibling::td/a/text()" - )[0] - except: - result = html.xpath( - "//td[contains(text(),'品番:')]/following-sibling::td/text()" - )[0] - return result - - -def getYear(getRelease): - try: - result = str(re.search(r"\d{4}", getRelease).group()) - return result - except: - return getRelease - - -def getRelease(text): - html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - try: - result = html.xpath( - "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()" - )[0].lstrip("\n") - except: - result = html.xpath( - "//td[contains(text(),'発売日:')]/following-sibling::td/text()" - )[0].lstrip("\n") - return result - - -def getTag(text): - html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - try: - result = html.xpath( - "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" - ) - except: - result = html.xpath( - "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" - ) - return result - - -def getCover(text, number): - html = etree.fromstring(text, etree.HTMLParser()) - cover_number = number - try: - result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] - except: - # sometimes fanza modify _ to \u0005f for image id - if "_" in cover_number: - cover_number = cover_number.replace("_", r"\u005f") - try: - result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] - except: - # (TODO) handle more edge case - # print(html) - # raise exception here, same behavior as before - # people's major requirement is fetching the picture - raise ValueError("can not find image") - return result - - -def getDirector(text): - html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - try: - result = html.xpath( - "//td[contains(text(),'監督:')]/following-sibling::td/a/text()" - )[0] - except: - result = html.xpath( - "//td[contains(text(),'監督:')]/following-sibling::td/text()" - )[0] - return result - - -def getOutline(text): - html = etree.fromstring(text, etree.HTMLParser()) - try: - result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace( - "\n", "" - ) - if result == "": - result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace( - "\n", "" - ) - except: - # (TODO) handle more edge case - # print(html) - return "" - return result - - -def main(number): - # fanza allow letter + number + underscore, normalize the input here - # @note: I only find the usage of underscore as h_test123456789 - fanza_search_number = number - # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix - if fanza_search_number.startswith("h-"): - fanza_search_number = fanza_search_number.replace("h-", "h_") - - fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower() - - fanza_urls = [ - "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=", - "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=", - "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=", - "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=", - ] - chosen_url = "" - for url in fanza_urls: - chosen_url = url + fanza_search_number - htmlcode = get_html(chosen_url) - if "404 Not Found" not in htmlcode: - break - if "404 Not Found" in htmlcode: - return json.dumps({"title": "",}) - try: - # for some old page, the input number does not match the page - # for example, the url will be cid=test012 - # but the hinban on the page is test00012 - # so get the hinban first, and then pass it to following functions - fanza_hinban = getNum(htmlcode) - data = { - "title": getTitle(htmlcode).strip(getActor(htmlcode)), - "studio": getStudio(htmlcode), - "outline": getOutline(htmlcode), - "runtime": getRuntime(htmlcode), - "director": getDirector(htmlcode) if "anime" not in chosen_url else "", - "actor": getActor(htmlcode) if "anime" not in chosen_url else "", - "release": getRelease(htmlcode), - "number": fanza_hinban, - "cover": getCover(htmlcode, fanza_hinban), - "imagecut": 1, - "tag": getTag(htmlcode), - "label": getLabel(htmlcode), - "year": getYear( - getRelease(htmlcode) - ), # str(re.search('\d{4}',getRelease(a)).group()), - "actor_photo": "", - "website": chosen_url, - "source": "fanza.py", - } - except: - data = { - "title": "", - } - js = json.dumps( - data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":") - ) # .encode('UTF-8') - return js - - -if __name__ == "__main__": - # print(main("DV-1562")) - # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") - # print(main("ipx292")) - pass +#!/usr/bin/python3 +# -*- coding: utf-8 -*- +import json +import re + +from lxml import etree + +from ADC_function import * + +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) + + +def getTitle(text): + html = etree.fromstring(text, etree.HTMLParser()) + result = html.xpath('//*[@id="title"]/text()')[0] + return result + + +def getActor(text): + # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + html = etree.fromstring(text, etree.HTMLParser()) + result = ( + str( + html.xpath( + "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()" + ) + ) + .strip(" ['']") + .replace("', '", ",") + ) + return result + + +def getStudio(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()" + )[0] + except: + result = html.xpath( + "//td[contains(text(),'メーカー')]/following-sibling::td/text()" + )[0] + return result + + +def getRuntime(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] + return re.search(r"\d+", str(result)).group() + + +def getLabel(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()" + )[0] + except: + result = html.xpath( + "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()" + )[0] + return result + + +def getNum(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'品番:')]/following-sibling::td/a/text()" + )[0] + except: + result = html.xpath( + "//td[contains(text(),'品番:')]/following-sibling::td/text()" + )[0] + return result + + +def getYear(getRelease): + try: + result = str(re.search(r"\d{4}", getRelease).group()) + return result + except: + return getRelease + + +def getRelease(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()" + )[0].lstrip("\n") + except: + result = html.xpath( + "//td[contains(text(),'発売日:')]/following-sibling::td/text()" + )[0].lstrip("\n") + return result + + +def getTag(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" + ) + except: + result = html.xpath( + "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" + ) + return result + + +def getCover(text, number): + html = etree.fromstring(text, etree.HTMLParser()) + cover_number = number + try: + result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] + except: + # sometimes fanza modify _ to \u0005f for image id + if "_" in cover_number: + cover_number = cover_number.replace("_", r"\u005f") + try: + result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] + except: + # (TODO) handle more edge case + # print(html) + # raise exception here, same behavior as before + # people's major requirement is fetching the picture + raise ValueError("can not find image") + return result + + +def getDirector(text): + html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + try: + result = html.xpath( + "//td[contains(text(),'監督:')]/following-sibling::td/a/text()" + )[0] + except: + result = html.xpath( + "//td[contains(text(),'監督:')]/following-sibling::td/text()" + )[0] + return result + + +def getOutline(text): + html = etree.fromstring(text, etree.HTMLParser()) + try: + result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace( + "\n", "" + ) + if result == "": + result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace( + "\n", "" + ) + except: + # (TODO) handle more edge case + # print(html) + return "" + return result + + +def main(number): + # fanza allow letter + number + underscore, normalize the input here + # @note: I only find the usage of underscore as h_test123456789 + fanza_search_number = number + # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix + if fanza_search_number.startswith("h-"): + fanza_search_number = fanza_search_number.replace("h-", "h_") + + fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower() + + fanza_urls = [ + "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=", + "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=", + "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=", + "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=", + ] + chosen_url = "" + for url in fanza_urls: + chosen_url = url + fanza_search_number + htmlcode = get_html(chosen_url) + if "404 Not Found" not in htmlcode: + break + if "404 Not Found" in htmlcode: + return json.dumps({"title": "",}) + try: + # for some old page, the input number does not match the page + # for example, the url will be cid=test012 + # but the hinban on the page is test00012 + # so get the hinban first, and then pass it to following functions + fanza_hinban = getNum(htmlcode) + data = { + "title": getTitle(htmlcode).strip(getActor(htmlcode)), + "studio": getStudio(htmlcode), + "outline": getOutline(htmlcode), + "runtime": getRuntime(htmlcode), + "director": getDirector(htmlcode) if "anime" not in chosen_url else "", + "actor": getActor(htmlcode) if "anime" not in chosen_url else "", + "release": getRelease(htmlcode), + "number": fanza_hinban, + "cover": getCover(htmlcode, fanza_hinban), + "imagecut": 1, + "tag": getTag(htmlcode), + "label": getLabel(htmlcode), + "year": getYear( + getRelease(htmlcode) + ), # str(re.search('\d{4}',getRelease(a)).group()), + "actor_photo": "", + "website": chosen_url, + "source": "fanza.py", + } + except: + data = { + "title": "", + } + js = json.dumps( + data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":") + ) # .encode('UTF-8') + return js + + +if __name__ == "__main__": + # print(main("DV-1562")) + # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") + # print(main("ipx292")) + pass diff --git a/fc2fans_club.py b/SiteSource/fc2fans_club.py similarity index 97% rename from fc2fans_club.py rename to SiteSource/fc2fans_club.py index 3215e49..9dfeb24 100755 --- a/fc2fans_club.py +++ b/SiteSource/fc2fans_club.py @@ -1,162 +1,162 @@ -import re -from lxml import etree#need install -import json -import ADC_function -# import sys -# import io -# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) - -def getTitle(htmlcode): #获取厂商 - #print(htmlcode) - html = etree.fromstring(htmlcode,etree.HTMLParser()) - result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']") - result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1) - #print(result2) - return result2 -def getActor(htmlcode): - try: - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']") - return result - except: - return '' -def getStudio(htmlcode): #获取厂商 - html = etree.fromstring(htmlcode,etree.HTMLParser()) - result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']") - return result -def getNum(htmlcode): #获取番号 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") - #print(result) - return result -def getRelease(htmlcode2): # - #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') - html=etree.fromstring(htmlcode2,etree.HTMLParser()) - result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") - return result -def getCover(htmlcode,number,htmlcode2): #获取厂商 # - #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') - html = etree.fromstring(htmlcode2, etree.HTMLParser()) - result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']") - if result == '': - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']") - return 'https://fc2club.com' + result2 - return 'http:' + result -def getOutline(htmlcode2): #获取番号 # - html = etree.fromstring(htmlcode2, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') - return result -def getTag(htmlcode): #获取番号 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()')) - return result.strip(" ['']").replace("'",'').replace(' ','') -def getYear(release): - try: - result = re.search('\d{4}',release).group() - return result - except: - return '' - -def getTitle_fc2com(htmlcode): #获取厂商 - html = etree.fromstring(htmlcode,etree.HTMLParser()) - result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0] - return result -def getActor_fc2com(htmlcode): - try: - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] - return result - except: - return '' -def getStudio_fc2com(htmlcode): #获取厂商 - try: - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']") - return result - except: - return '' -def getNum_fc2com(htmlcode): #获取番号 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") - return result -def getRelease_fc2com(htmlcode2): # - html=etree.fromstring(htmlcode2,etree.HTMLParser()) - result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") - return result -def getCover_fc2com(htmlcode2): #获取厂商 # - html = etree.fromstring(htmlcode2, etree.HTMLParser()) - result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']") - return 'http:' + result -def getOutline_fc2com(htmlcode2): #获取番号 # - html = etree.fromstring(htmlcode2, etree.HTMLParser()) - result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') - return result -def getTag_fc2com(number): #获取番号 - htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape')) - result = re.findall('"tag":"(.*?)"', htmlcode) - return result -def getYear_fc2com(release): - try: - result = re.search('\d{4}',release).group() - return result - except: - return '' - -def main(number): - try: - htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/') - htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html') - actor = getActor(htmlcode) - if getActor(htmlcode) == '': - actor = 'FC2系列' - dic = { - 'title': getTitle(htmlcode), - 'studio': getStudio(htmlcode), - 'year': '',#str(re.search('\d{4}',getRelease(number)).group()), - 'outline': '',#getOutline(htmlcode2), - 'runtime': getYear(getRelease(htmlcode)), - 'director': getStudio(htmlcode), - 'actor': actor, - 'release': getRelease(number), - 'number': 'FC2-'+number, - 'label': '', - 'cover': getCover(htmlcode,number,htmlcode2), - 'imagecut': 0, - 'tag': getTag(htmlcode), - 'actor_photo':'', - 'website': 'https://fc2club.com//html/FC2-' + number + '.html', - 'source':'https://fc2club.com//html/FC2-' + number + '.html', - } - if dic['title'] == '': - htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'}) - actor = getActor(htmlcode) - if getActor(htmlcode) == '': - actor = 'FC2系列' - dic = { - 'title': getTitle_fc2com(htmlcode2), - 'studio': getStudio_fc2com(htmlcode2), - 'year': '', # str(re.search('\d{4}',getRelease(number)).group()), - 'outline': getOutline_fc2com(htmlcode2), - 'runtime': getYear_fc2com(getRelease(htmlcode2)), - 'director': getStudio_fc2com(htmlcode2), - 'actor': actor, - 'release': getRelease_fc2com(number), - 'number': 'FC2-' + number, - 'cover': getCover_fc2com(htmlcode2), - 'imagecut': 0, - 'tag': getTag_fc2com(number), - 'label': '', - 'actor_photo': '', - 'website': 'http://adult.contents.fc2.com/article/' + number + '/', - 'source': 'http://adult.contents.fc2.com/article/' + number + '/', - } - except Exception as e: - # (TODO) better handle this - # print(e) - dic = {"title": ""} - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') - return js - - -#print(main('1252953')) +import re +from lxml import etree#need install +import json +import ADC_function +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) + +def getTitle(htmlcode): #获取厂商 + #print(htmlcode) + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']") + result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1) + #print(result2) + return result2 +def getActor(htmlcode): + try: + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']") + return result + except: + return '' +def getStudio(htmlcode): #获取厂商 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']") + return result +def getNum(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") + #print(result) + return result +def getRelease(htmlcode2): # + #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') + html=etree.fromstring(htmlcode2,etree.HTMLParser()) + result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") + return result +def getCover(htmlcode,number,htmlcode2): #获取厂商 # + #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') + html = etree.fromstring(htmlcode2, etree.HTMLParser()) + result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']") + if result == '': + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']") + return 'https://fc2club.com' + result2 + return 'http:' + result +def getOutline(htmlcode2): #获取番号 # + html = etree.fromstring(htmlcode2, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') + return result +def getTag(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()')) + return result.strip(" ['']").replace("'",'').replace(' ','') +def getYear(release): + try: + result = re.search('\d{4}',release).group() + return result + except: + return '' + +def getTitle_fc2com(htmlcode): #获取厂商 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0] + return result +def getActor_fc2com(htmlcode): + try: + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] + return result + except: + return '' +def getStudio_fc2com(htmlcode): #获取厂商 + try: + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']") + return result + except: + return '' +def getNum_fc2com(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") + return result +def getRelease_fc2com(htmlcode2): # + html=etree.fromstring(htmlcode2,etree.HTMLParser()) + result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") + return result +def getCover_fc2com(htmlcode2): #获取厂商 # + html = etree.fromstring(htmlcode2, etree.HTMLParser()) + result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']") + return 'http:' + result +def getOutline_fc2com(htmlcode2): #获取番号 # + html = etree.fromstring(htmlcode2, etree.HTMLParser()) + result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',') + return result +def getTag_fc2com(number): #获取番号 + htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape')) + result = re.findall('"tag":"(.*?)"', htmlcode) + return result +def getYear_fc2com(release): + try: + result = re.search('\d{4}',release).group() + return result + except: + return '' + +def main(number): + try: + htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/') + htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html') + actor = getActor(htmlcode) + if getActor(htmlcode) == '': + actor = 'FC2系列' + dic = { + 'title': getTitle(htmlcode), + 'studio': getStudio(htmlcode), + 'year': '',#str(re.search('\d{4}',getRelease(number)).group()), + 'outline': '',#getOutline(htmlcode2), + 'runtime': getYear(getRelease(htmlcode)), + 'director': getStudio(htmlcode), + 'actor': actor, + 'release': getRelease(number), + 'number': 'FC2-'+number, + 'label': '', + 'cover': getCover(htmlcode,number,htmlcode2), + 'imagecut': 0, + 'tag': getTag(htmlcode), + 'actor_photo':'', + 'website': 'https://fc2club.com//html/FC2-' + number + '.html', + 'source':'https://fc2club.com//html/FC2-' + number + '.html', + } + if dic['title'] == '': + htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'}) + actor = getActor(htmlcode) + if getActor(htmlcode) == '': + actor = 'FC2系列' + dic = { + 'title': getTitle_fc2com(htmlcode2), + 'studio': getStudio_fc2com(htmlcode2), + 'year': '', # str(re.search('\d{4}',getRelease(number)).group()), + 'outline': getOutline_fc2com(htmlcode2), + 'runtime': getYear_fc2com(getRelease(htmlcode2)), + 'director': getStudio_fc2com(htmlcode2), + 'actor': actor, + 'release': getRelease_fc2com(number), + 'number': 'FC2-' + number, + 'cover': getCover_fc2com(htmlcode2), + 'imagecut': 0, + 'tag': getTag_fc2com(number), + 'label': '', + 'actor_photo': '', + 'website': 'http://adult.contents.fc2.com/article/' + number + '/', + 'source': 'http://adult.contents.fc2.com/article/' + number + '/', + } + except Exception as e: + # (TODO) better handle this + # print(e) + dic = {"title": ""} + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') + return js + + +#print(main('1252953')) diff --git a/javbus.py b/SiteSource/javbus.py similarity index 95% rename from javbus.py rename to SiteSource/javbus.py index aa18d2a..ea06ac4 100755 --- a/javbus.py +++ b/SiteSource/javbus.py @@ -1,138 +1,139 @@ -import re -from pyquery import PyQuery as pq#need install -from lxml import etree#need install -from bs4 import BeautifulSoup#need install -import json -from ADC_function import * - -def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img - soup = BeautifulSoup(htmlcode, 'lxml') - a = soup.find_all(attrs={'class': 'star-name'}) - d={} - for i in a: - l=i.a['href'] - t=i.get_text() - html = etree.fromstring(get_html(l), etree.HTMLParser()) - p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") - p2={t:p} - d.update(p2) - return d -def getTitle(htmlcode): #获取标题 - doc = pq(htmlcode) - title=str(doc('div.container h3').text()).replace(' ','-') - try: - title2 = re.sub('n\d+-','',title) - return title2 - except: - return title -def getStudio(htmlcode): #获取厂商 - html = etree.fromstring(htmlcode,etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") - return result -def getYear(htmlcode): #获取年份 - html = etree.fromstring(htmlcode,etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") - return result -def getCover(htmlcode): #获取封面链接 - doc = pq(htmlcode) - image = doc('a.bigImage') - return image.attr('href') -def getRelease(htmlcode): #获取出版日期 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") - return result -def getRuntime(htmlcode): #获取分钟 - soup = BeautifulSoup(htmlcode, 'lxml') - a = soup.find(text=re.compile('分鐘')) - return a -def getActor(htmlcode): #获取女优 - b=[] - soup=BeautifulSoup(htmlcode,'lxml') - a=soup.find_all(attrs={'class':'star-name'}) - for i in a: - b.append(i.get_text()) - return b -def getNum(htmlcode): #获取番号 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") - return result -def getDirector(htmlcode): #获取导演 - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") - return result -def getOutline(htmlcode): #获取演员 - doc = pq(htmlcode) - result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text()) - return result -def getSerise(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") - return result -def getTag(htmlcode): # 获取演员 - tag = [] - soup = BeautifulSoup(htmlcode, 'lxml') - a = soup.find_all(attrs={'class': 'genre'}) - for i in a: - if 'onmouseout' in str(i): - continue - tag.append(i.get_text()) - return tag - - -def main(number): - try: - htmlcode = get_html('https://www.javbus.com/' + number) - try: - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) - except: - dww_htmlcode = '' - dic = { - 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), - 'studio': getStudio(htmlcode), - 'year': str(re.search('\d{4}', getYear(htmlcode)).group()), - 'outline': getOutline(dww_htmlcode), - 'runtime': getRuntime(htmlcode), - 'director': getDirector(htmlcode), - 'actor': getActor(htmlcode), - 'release': getRelease(htmlcode), - 'number': getNum(htmlcode), - 'cover': getCover(htmlcode), - 'imagecut': 1, - 'tag': getTag(htmlcode), - 'label': getSerise(htmlcode), - 'actor_photo': getActorPhoto(htmlcode), - 'website': 'https://www.javbus.com/' + number, - 'source' : 'javbus.py', - } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js - except: - return main_uncensored(number) - -def main_uncensored(number): - htmlcode = get_html('https://www.javbus.com/' + number) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) - if getTitle(htmlcode) == '': - htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_')) - dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) - dic = { - 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''), - 'studio': getStudio(htmlcode), - 'year': getYear(htmlcode), - 'outline': getOutline(dww_htmlcode), - 'runtime': getRuntime(htmlcode), - 'director': getDirector(htmlcode), - 'actor': getActor(htmlcode), - 'release': getRelease(htmlcode), - 'number': getNum(htmlcode), - 'cover': getCover(htmlcode), - 'tag': getTag(htmlcode), - 'label': getSerise(htmlcode), - 'imagecut': 0, - 'actor_photo': '', - 'website': 'https://www.javbus.com/' + number, - 'source': 'javbus.py', - } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js - +import re +from pyquery import PyQuery as pq#need install +from lxml import etree#need install +from bs4 import BeautifulSoup#need install +import json +from ADC_function import * + +def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find_all(attrs={'class': 'star-name'}) + d={} + for i in a: + l=i.a['href'] + t=i.get_text() + html = etree.fromstring(get_html(l), etree.HTMLParser()) + p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") + p2={t:p} + d.update(p2) + return d +def getTitle(htmlcode): #获取标题 + doc = pq(htmlcode) + title=str(doc('div.container h3').text()).replace(' ','-') + try: + title2 = re.sub('n\d+-','',title) + return title2 + except: + return title +def getStudio(htmlcode): #获取厂商 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']") + return result +def getYear(htmlcode): #获取年份 + html = etree.fromstring(htmlcode,etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") + return result +def getCover(htmlcode): #获取封面链接 + doc = pq(htmlcode) + image = doc('a.bigImage') + return image.attr('href') +def getRelease(htmlcode): #获取出版日期 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']") + return result +def getRuntime(htmlcode): #获取分钟 + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find(text=re.compile('分鐘')) + return a +def getActor(htmlcode): #获取女优 + b=[] + soup=BeautifulSoup(htmlcode,'lxml') + a=soup.find_all(attrs={'class':'star-name'}) + for i in a: + b.append(i.get_text()) + return b +def getNum(htmlcode): #获取番号 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") + return result +def getDirector(htmlcode): #获取导演 + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']") + return result +def getOutline(htmlcode): #获取演员 + doc = pq(htmlcode) + result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text()) + return result +def getSerise(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']") + return result +def getTag(htmlcode): # 获取演员 + tag = [] + soup = BeautifulSoup(htmlcode, 'lxml') + a = soup.find_all(attrs={'class': 'genre'}) + for i in a: + if 'onmouseout' in str(i): + continue + tag.append(i.get_text()) + return tag + + +def main(number): + try: + htmlcode = get_html('https://www.javbus.com/' + number) + try: + dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + except: + dww_htmlcode = '' + dic = { + 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))), + 'studio': getStudio(htmlcode), + 'year': str(re.search('\d{4}', getYear(htmlcode)).group()), + 'outline': getOutline(dww_htmlcode), + 'runtime': getRuntime(htmlcode), + 'director': getDirector(htmlcode), + 'actor': getActor(htmlcode), + 'release': getRelease(htmlcode), + 'number': getNum(htmlcode), + 'cover': getCover(htmlcode), + 'imagecut': 1, + 'tag': getTag(htmlcode), + 'label': getSerise(htmlcode), + 'actor_photo': getActorPhoto(htmlcode), + 'website': 'https://www.javbus.com/' + number, + 'source' : 'javbus.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js + except: + return main_uncensored(number) + + +def main_uncensored(number): # 无码 + htmlcode = get_html('https://www.javbus.com/' + number) + dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + if getTitle(htmlcode) == '': + htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_')) + dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) + dic = { + 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))).replace(getNum(htmlcode)+'-', ''), + 'studio': getStudio(htmlcode), + 'year': getYear(htmlcode), + 'outline': getOutline(dww_htmlcode), + 'runtime': getRuntime(htmlcode), + 'director': getDirector(htmlcode), + 'actor': getActor(htmlcode), + 'release': getRelease(htmlcode), + 'number': getNum(htmlcode), + 'cover': getCover(htmlcode), + 'tag': getTag(htmlcode), + 'label': getSerise(htmlcode), + 'imagecut': 0, + 'actor_photo': '', + 'website': 'https://www.javbus.com/' + number, + 'source': 'javbus.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js + diff --git a/javdb.py b/SiteSource/javdb.py similarity index 98% rename from javdb.py rename to SiteSource/javdb.py index 727c992..180602a 100755 --- a/javdb.py +++ b/SiteSource/javdb.py @@ -1,123 +1,123 @@ -import re -from lxml import etree -import json -from bs4 import BeautifulSoup -from ADC_function import * -# import sys -# import io -# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) - -def getTitle(a): - html = etree.fromstring(a, etree.HTMLParser()) - result = html.xpath("/html/body/section/div/h2/strong/text()")[0] - return result -def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') -def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img - a = actor.split(',') - d={} - for i in a: - p={i:''} - d.update(p) - return d -def getStudio(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') -def getRuntime(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').rstrip('mi') -def getLabel(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') -def getNum(a): - html = etree.fromstring(a, etree.HTMLParser()) - result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result2 + result1).strip('+') -def getYear(getRelease): - try: - result = str(re.search('\d{4}', getRelease).group()) - return result - except: - return getRelease -def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+') -def getTag(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',') -def getCover_small(a, index=0): - # same issue mentioned below, - # javdb sometime returns multiple results - # DO NOT just get the firt one, get the one with correct index number - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index] - if not 'https' in result: - result = 'https:' + result - return result -def getCover(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']") - return result -def getDirector(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']") - result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']") - return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') -def getOutline(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") - return result -def main(number): - try: - number = number.upper() - query_result = get_html('https://javdb.com/search?q=' + number + '&f=all') - html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - # javdb sometime returns multiple results, - # and the first elememt maybe not the one we are looking for - # iterate all candidates and find the match one - urls = html.xpath('//*[@id="videos"]/div/div/a/@href') - ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') - correct_url = urls[ids.index(number)] - detail_page = get_html('https://javdb.com' + correct_url) - dic = { - 'actor': getActor(detail_page), - 'title': getTitle(detail_page), - 'studio': getStudio(detail_page), - 'outline': getOutline(detail_page), - 'runtime': getRuntime(detail_page), - 'director': getDirector(detail_page), - 'release': getRelease(detail_page), - 'number': getNum(detail_page), - 'cover': getCover(detail_page), - 'cover_small': getCover_small(query_result, index=ids.index(number)), - 'imagecut': 3, - 'tag': getTag(detail_page), - 'label': getLabel(detail_page), - 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': getActorPhoto(getActor(detail_page)), - 'website': 'https://javdb.com' + correct_url, - 'source': 'javdb.py', - } - except Exception as e: - # print(e) - dic = {"title": ""} - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js - -# main('DV-1562') -# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") -#print(main('ipx-292')) +import re +from lxml import etree +import json +from bs4 import BeautifulSoup +from ADC_function import * +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) + +def getTitle(a): + html = etree.fromstring(a, etree.HTMLParser()) + result = html.xpath("/html/body/section/div/h2/strong/text()")[0] + return result +def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') +def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img + a = actor.split(',') + d={} + for i in a: + p={i:''} + d.update(p) + return d +def getStudio(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') +def getRuntime(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+').rstrip('mi') +def getLabel(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') +def getNum(a): + html = etree.fromstring(a, etree.HTMLParser()) + result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result2 + result1).strip('+') +def getYear(getRelease): + try: + result = str(re.search('\d{4}', getRelease).group()) + return result + except: + return getRelease +def getRelease(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+') +def getTag(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',') +def getCover_small(a, index=0): + # same issue mentioned below, + # javdb sometime returns multiple results + # DO NOT just get the firt one, get the one with correct index number + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index] + if not 'https' in result: + result = 'https:' + result + return result +def getCover(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']") + return result +def getDirector(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']") + result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']") + return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') +def getOutline(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") + return result +def main(number): + try: + number = number.upper() + query_result = get_html('https://javdb.com/search?q=' + number + '&f=all') + html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + # javdb sometime returns multiple results, + # and the first elememt maybe not the one we are looking for + # iterate all candidates and find the match one + urls = html.xpath('//*[@id="videos"]/div/div/a/@href') + ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') + correct_url = urls[ids.index(number)] + detail_page = get_html('https://javdb.com' + correct_url) + dic = { + 'actor': getActor(detail_page), + 'title': getTitle(detail_page), + 'studio': getStudio(detail_page), + 'outline': getOutline(detail_page), + 'runtime': getRuntime(detail_page), + 'director': getDirector(detail_page), + 'release': getRelease(detail_page), + 'number': getNum(detail_page), + 'cover': getCover(detail_page), + 'cover_small': getCover_small(query_result, index=ids.index(number)), + 'imagecut': 3, + 'tag': getTag(detail_page), + 'label': getLabel(detail_page), + 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': getActorPhoto(getActor(detail_page)), + 'website': 'https://javdb.com' + correct_url, + 'source': 'javdb.py', + } + except Exception as e: + # print(e) + dic = {"title": ""} + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js + +# main('DV-1562') +# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") +#print(main('ipx-292')) diff --git a/mgstage.py b/SiteSource/mgstage.py similarity index 98% rename from mgstage.py rename to SiteSource/mgstage.py index 8e358c9..d1a8e95 100755 --- a/mgstage.py +++ b/SiteSource/mgstage.py @@ -1,108 +1,108 @@ -import re -from lxml import etree -import json -from bs4 import BeautifulSoup -from ADC_function import * -# import sys -# import io -# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) - -def getTitle(a): - try: - html = etree.fromstring(a, etree.HTMLParser()) - result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']") - return result.replace('/', ',') - except: - return '' -def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() - html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() - result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') - result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') - return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',') -def getStudio(a): - html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() - result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') - result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') - return str(result1+result2).strip('+').replace("', '",'').replace('"','') -def getRuntime(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') - result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') - return str(result1 + result2).strip('+').rstrip('mi') -def getLabel(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - return str(result1 + result2).strip('+').replace("', '",'').replace('"','') -def getNum(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - return str(result1 + result2).strip('+') -def getYear(getRelease): - try: - result = str(re.search('\d{4}',getRelease).group()) - return result - except: - return getRelease -def getRelease(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - return str(result1 + result2).strip('+') -def getTag(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','') -def getCover(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']") - # /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src - return result -def getDirector(a): - html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip( - '\\n') - return str(result1 + result2).strip('+').replace("', '",'').replace('"','') -def getOutline(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") - return result -def main(number2): - number=number2.upper() - htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'})) - soup = BeautifulSoup(htmlcode, 'lxml') - a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','') - dic = { - 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''), - 'studio': getStudio(a), - 'outline': getOutline(htmlcode), - 'runtime': getRuntime(a), - 'director': getDirector(a), - 'actor': getActor(a), - 'release': getRelease(a), - 'number': getNum(a), - 'cover': getCover(htmlcode), - 'imagecut': 0, - 'tag': getTag(a), - 'label':getLabel(a), - 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), - 'actor_photo': '', - 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', - 'source': 'mgstage.py', - } - js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') - return js - -#print(main('SIRO-3607')) +import re +from lxml import etree +import json +from bs4 import BeautifulSoup +from ADC_function import * +# import sys +# import io +# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) + +def getTitle(a): + try: + html = etree.fromstring(a, etree.HTMLParser()) + result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']") + return result.replace('/', ',') + except: + return '' +def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() + html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() + result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') + result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') + return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',') +def getStudio(a): + html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() + result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') + result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') + return str(result1+result2).strip('+').replace("', '",'').replace('"','') +def getRuntime(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n') + result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n') + return str(result1 + result2).strip('+').rstrip('mi') +def getLabel(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + return str(result1 + result2).strip('+').replace("', '",'').replace('"','') +def getNum(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + return str(result1 + result2).strip('+') +def getYear(getRelease): + try: + result = str(re.search('\d{4}',getRelease).group()) + return result + except: + return getRelease +def getRelease(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + return str(result1 + result2).strip('+') +def getTag(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','') +def getCover(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']") + # /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src + return result +def getDirector(a): + html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text() + result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip( + '\\n') + return str(result1 + result2).strip('+').replace("', '",'').replace('"','') +def getOutline(htmlcode): + html = etree.fromstring(htmlcode, etree.HTMLParser()) + result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") + return result +def main(number2): + number=number2.upper() + htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'})) + soup = BeautifulSoup(htmlcode, 'lxml') + a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','') + dic = { + 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''), + 'studio': getStudio(a), + 'outline': getOutline(htmlcode), + 'runtime': getRuntime(a), + 'director': getDirector(a), + 'actor': getActor(a), + 'release': getRelease(a), + 'number': getNum(a), + 'cover': getCover(htmlcode), + 'imagecut': 0, + 'tag': getTag(a), + 'label':getLabel(a), + 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()), + 'actor_photo': '', + 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', + 'source': 'mgstage.py', + } + js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8') + return js + +#print(main('SIRO-3607')) diff --git a/TestPathNFO.txt b/TestPathNFO.txt new file mode 100644 index 0000000..399647c --- /dev/null +++ b/TestPathNFO.txt @@ -0,0 +1,41 @@ +/Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi +/Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4 +/Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21299.mkv +/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4 +/mcdv47.avi +/mcdv-47.avi +/mcdv-047.mp4 +/mcdv047.mp4 +/mcdv0047.mp4 +/1pondo-070409_621.mp4 +/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4 +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4 +/Volumes/Adult/Files/107NTTR-037A.mp4 +/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4 +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv +/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv +/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv +/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv +/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv +/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb +/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4 +/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv +/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv +/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4 +/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv +/Volumes/Adult/Files/tia/soe935C.HD.wmv +/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv +/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4 +/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts +/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4 +/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi +/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv +/Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi +/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4 +/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4 +/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv +/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv \ No newline at end of file diff --git a/TestPathSpecial.txt b/TestPathSpecial.txt new file mode 100644 index 0000000..cc22544 --- /dev/null +++ b/TestPathSpecial.txt @@ -0,0 +1,51 @@ +/Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21222.mkv +/Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi +/Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4 +/Volumes/192.168.2.100/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 +/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-1 彼女の姉貴とイケナイ関係 Rio.wmv +/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999A 彼女の姉貴とイケナイ関係 Rio.wmv +/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-A 彼女の姉貴とイケナイ関係 Rio.wmv +/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-C 彼女の姉貴とイケナイ関係 Rio.wmv +/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-B 彼女の姉貴とイケナイ関係 Rio.wmv +/Volumes/192.168.2.100/Adult/Files/tia/soe935C.HD.wmv +/Volumes/192.168.2.100/Adult/Files/tia/soe935B.HD.wmv +/Volumes/192.168.2.100/Adult/Files/tia/soe935A.HD.wmv +/Volumes/192.168.2.100/Adult/Files/tia/soe935D.HD.wmv +/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv +/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4 +/mcdv47.avi +/mcdv-47.avi +/mcdv-047.mp4 +/mcdv047.mp4 +/mcdv0047.mp4 +/1pondo-070409_621.mp4 +/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4 +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4 +/Volumes/Adult/Files/107NTTR-037A.mp4 +/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4 +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv +/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv +/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv +/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv +/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv +/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb +/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4 +/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv +/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv +/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4 +/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv +/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv +/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4 +/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts +/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4 +/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi +/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv +/Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi +/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4 +/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4 +/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv +/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv \ No newline at end of file diff --git a/TestPaths.txt b/TestPaths.txt new file mode 100644 index 0000000..93ecbe0 --- /dev/null +++ b/TestPaths.txt @@ -0,0 +1,50 @@ +/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv +/1pondo-070409_621.mp4 +/Volumes/Adult/Files/107NTTR-037.mp4 +/Volumes/Adult/Files/107NTTR-037A.mp4 +/Volumes/Adult/Files/Yua.Mikami-PML/TEK-097 ふたりは無敵.wmv +/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4 +/Volumes/Adult/Files/Yua.Mikami-PML/SSNI-030 三上悠亜ファン感謝祭 国民的アイドル×一般ユーザー20人‘ガチファンとSEX解禁’ハメまくりスペシャル【桃花族】.mp4 +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/MIDD-893A.mkv +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv +/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv +/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv +/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv +/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv +/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb +/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4 +/Volumes/Adult/Files/桜木凛 Rin Sakuragi FHD Collection Pack Vol/BBI-183.wmv +/Volumes/Adult/Files/NOP-019 芭蕾教室 水嶋あずみ/NOP019B.HD.wmv +/Volumes/Adult/Files/一ノ瀬アメリ part2/栗栖エリカ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi +/Volumes/Adult/Files/一ノ瀬アメリ part2/Max Girls/Max Girls 24(xv804)伊東遥,Rio,小沢アリス,葉月しおり,一ノ瀬アメリ,ひなた結衣,藤崎りお.avi +/Volumes/Adult/Files/一ノ瀬アメリ part2/ノ瀬アメリAmeri Ichinose/20091127一ノ瀬アメリ - 一見面就做愛(xv801).avi +/Volumes/Adult/Files/Aki Sasaki Megapack/MSTG-003.mkv +/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv +/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv +/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4 +/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-171)彼女のお姉さんは、誘惑ヤリたがり娘。桃谷エリカ.wmv +/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-145)濃密な接吻と欲情ベロキス性交 04 桃谷エリカ.wmv +/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv +/Volumes/Adult/Files/tia/soe935C.HD.wmv +/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv +/Volumes/Adult/Files/sakumomo1203-PML/IDBD-795 ももに夢中 2018年日本人にもっとも愛された女優桜空ももPREMIUM BOX8時間BEST.mp4 +/Volumes/Adult/Files/sakumomo1203-PML/IDBD-768 Gカップグラビアアイドル桜空もも初ベスト 原石 2【桃花族】.mp4 +/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4 +/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4 +/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts +/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4 +/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#3(190119)@RUNBKK/No-Watermarked/SOE976.FHD3.wmv +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4 +/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4 +/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi +/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv +/Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi +/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4 +/Volumes/Adult/Files/ノ瀬アメリ/20100226一ノ瀬アメリ - OL Style 制服(xv827).avi +/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4 +/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4 +/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv +/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv +/Volumes/Adult/Files/Uncensored Mosaic Removal Megapack/ADN-017(Asami Ogawa).mp4 \ No newline at end of file diff --git a/config.ini b/config.ini old mode 100644 new mode 100755 index f2ac60d..a017bab --- a/config.ini +++ b/config.ini @@ -1,28 +1,35 @@ [common] -main_mode=1 -failed_output_folder=failed -success_output_folder=JAV_output +main_mode=2 +# 路径均为绝对路径,不要写入" '等符号 +search_folder= /Volumes/192.168.2.100/Adult/AVTest +# 如果failed_output_folder 为空,抓取不到相关信息的视频将不回移动 +failed_output_folder= /Volumes/192.168.2.100/Adult/UnknownStars +success_output_folder= /Volumes/192.168.2.100/Adult/Files +#临时资源存储路径,比如xxx.nfo 海报图 +temp_folder= /Volumes/192.168.2.100/Adult/temp +# 如果是远程挂载的盘符,建议不开启创建软连接:软连接链接的是绝对路径,远程NAS上的路径和本地挂载的路径一般不同。 soft_link=0 [proxy] -proxy=127.0.0.1:1080 -timeout=10 -retry=3 +#例子为socks代理配置,可以 =后留空 +proxy= socks5h://127.0.0.1:1081 +timeout= 10 +retry= 5 [Name_Rule] -location_rule=actor+'/'+number -naming_rule=number+'-'+title +location_rule= actor+'/'+number +naming_rule= number+'-'+title [update] update_check=1 [media] -media_warehouse=emby #emby or plex or kodi ,emby=jellyfin +media_warehouse=EMBY [escape] literals=\() -folders=failed,JAV_output +folders=/Volumes/Adult/UnknownStars,/Volumes/Adult/Stars [debug_mode] -switch=1 \ No newline at end of file +switch=1 diff --git a/core.py b/core.py index 5b47d6b..23e1237 100755 --- a/core.py +++ b/core.py @@ -1,691 +1,918 @@ -# -*- coding: utf-8 -*- - -import re -import os -import os.path -import shutil -from PIL import Image -import time -import json -from ADC_function import * -from configparser import ConfigParser -import argparse -# =========website======== -import fc2fans_club -import mgstage -import avsox -import javbus -import javdb -import fanza -import jav321 -import requests - - -# =====================本地文件处理=========================== - -def escapePath(path, Config): # Remove escape literals - escapeLiterals = Config['escape']['literals'] - backslash = '\\' - for literal in escapeLiterals: - path = path.replace(backslash + literal, '') - return path - - -def moveFailedFolder(filepath, failed_folder): - print('[-]Move to Failed output folder') - shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/') - return - - -def CreatFailedFolder(failed_folder): - if not os.path.exists(failed_folder + '/'): # 新建failed文件夹 - try: - os.makedirs(failed_folder + '/') - except: - print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)") - return - - -def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元数据 - """ - iterate through all services and fetch the data - """ - - func_mapping = { - "avsox": avsox.main, - "fc2": fc2fans_club.main, - "fanza": fanza.main, - "javdb": javdb.main, - "javbus": javbus.main, - "mgstage": mgstage.main, - "jav321": jav321.main, - } - - # default fetch order list, from the begining to the end - sources = ["javbus", "javdb", "fanza", "mgstage", "fc2", "avsox", "jav321"] - - # if the input file name matches centain rules, - # move some web service to the begining of the list - if re.match(r"^\d{5,}", file_number) or ( - "HEYZO" in file_number or "heyzo" in file_number or "Heyzo" in file_number - ): - sources.insert(0, sources.pop(sources.index("avsox"))) - elif re.match(r"\d+\D+", file_number) or ( - "siro" in file_number or "SIRO" in file_number or "Siro" in file_number - ): - sources.insert(0, sources.pop(sources.index("mgstage"))) - elif "fc2" in file_number or "FC2" in file_number: - sources.insert(0, sources.pop(sources.index("fc2"))) - - for source in sources: - json_data = json.loads(func_mapping[source](file_number)) - # if any service return a valid return, break - if getDataState(json_data) != 0: - break - - # ================================================网站规则添加结束================================================ - - title = json_data['title'] - actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 - release = json_data['release'] - number = json_data['number'] - studio = json_data['studio'] - source = json_data['source'] - runtime = json_data['runtime'] - outline = json_data['runtime'] - label = json_data['label'] - year = json_data['year'] - try: - cover_small = json_data['cover_small'] - except: - cover_small = '' - imagecut = json_data['imagecut'] - tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ - actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') - - - if title == '' or number == '': - print('[-]Movie Data not found!') - moveFailedFolder(filepath, failed_folder) - return - - # if imagecut == '3': - # DownloadFileWithFilename() - - # ====================处理异常字符====================== #\/:*?"<>| - title = title.replace('\\', '') - title = title.replace('/', '') - title = title.replace(':', '') - title = title.replace('*', '') - title = title.replace('?', '') - title = title.replace('"', '') - title = title.replace('<', '') - title = title.replace('>', '') - title = title.replace('|', '') - release = release.replace('/', '-') - tmpArr = cover_small.split(',') - if len(tmpArr) > 0: - cover_small = tmpArr[0].strip('\"').strip('\'') - # ====================处理异常字符 END================== #\/:*?"<>| - - naming_rule = eval(config['Name_Rule']['naming_rule']) - location_rule = eval(config['Name_Rule']['location_rule']) - - # 返回处理后的json_data - json_data['title'] = title - json_data['actor'] = actor - json_data['release'] = release - json_data['cover_small'] = cover_small - json_data['tag'] = tag - json_data['naming_rule'] = naming_rule - json_data['location_rule'] = location_rule - json_data['year'] = year - return json_data - - -def get_info(json_data): # 返回json里的数据 - title = json_data['title'] - studio = json_data['studio'] - year = json_data['year'] - outline = json_data['outline'] - runtime = json_data['runtime'] - director = json_data['director'] - actor_photo = json_data['actor_photo'] - release = json_data['release'] - number = json_data['number'] - cover = json_data['cover'] - website = json_data['website'] - return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website - - -def smallCoverCheck(path, number, imagecut, cover_small, c_word, option, Config, filepath, failed_folder): - if imagecut == 3: - if option == 'emby': - DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder) - try: - img = Image.open(path + '/1.jpg') - except Exception: - img = Image.open('1.jpg') - w = img.width - h = img.height - img.save(path + '/' + number + c_word + '.png') - time.sleep(1) - os.remove(path + '/1.jpg') - if option == 'kodi': - DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder) - try: - img = Image.open(path + '/1.jpg') - except Exception: - img = Image.open('1.jpg') - w = img.width - h = img.height - img.save(path + '/' + number + c_word + '-poster.jpg') - time.sleep(1) - os.remove(path + '/1.jpg') - if option == 'plex': - DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder) - try: - img = Image.open(path + '/1.jpg') - except Exception: - img = Image.open('1.jpg') - w = img.width - h = img.height - img.save(path + '/poster.jpg') - os.remove(path + '/1.jpg') - - -def creatFolder(success_folder, location_rule, json_data, Config): # 创建文件夹 - title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data) - if len(location_rule) > 240: # 新建成功输出文件夹 - path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor", - "'manypeople'", - 3) # path为影片+元数据所在目录 - else: - path = success_folder + '/' + location_rule - # print(path) - if not os.path.exists(path): - path = escapePath(path, Config) - try: - os.makedirs(path) - except: - path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number") - path = escapePath(path, Config) - os.makedirs(path) - return path - - -# =====================资源下载部分=========================== -def DownloadFileWithFilename(url, filename, path, Config, filepath, failed_folder): # path = examle:photo , video.in the Project Folder! - proxy, timeout, retry_count = get_network_settings() - i = 0 - - while i < retry_count: - try: - if not proxy == '': - if not os.path.exists(path): - os.makedirs(path) - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - r = requests.get(url, headers=headers, timeout=timeout, - proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)}) - if r == '': - print('[-]Movie Data not found!') - return - with open(str(path) + "/" + filename, "wb") as code: - code.write(r.content) - return - else: - if not os.path.exists(path): - os.makedirs(path) - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - r = requests.get(url, timeout=timeout, headers=headers) - if r == '': - print('[-]Movie Data not found!') - return - with open(str(path) + "/" + filename, "wb") as code: - code.write(r.content) - return - except requests.exceptions.RequestException: - i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) - except requests.exceptions.ConnectionError: - i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) - except requests.exceptions.ProxyError: - i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) - except requests.exceptions.ConnectTimeout: - i += 1 - print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) - print('[-]Connect Failed! Please check your Proxy or Network!') - moveFailedFolder(filepath, failed_folder) - return - - -def imageDownload(option, cover, number, c_word, path, multi_part, Config, filepath, failed_folder): # 封面是否下载成功,否则移动到failed - if option == 'emby': - if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) == 'failed': - moveFailedFolder(filepath, failed_folder) - return - DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) - if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: - print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') - return - i = 1 - while i <= int(config['proxy']['retry']): - if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: - print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]') - DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) - i = i + 1 - continue - else: - break - if multi_part == 1: - old_name = os.path.join(path, number + c_word + '.jpg') - new_name = os.path.join(path, number + c_word + '.jpg') - os.rename(old_name, new_name) - print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') - else: - print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') - elif option == 'plex': - if DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) == 'failed': - moveFailedFolder(filepath, failed_folder) - return - DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) - if not os.path.getsize(path + '/fanart.jpg') == 0: - print('[+]Image Downloaded!', path + '/fanart.jpg') - return - i = 1 - while i <= int(config['proxy']['retry']): - if os.path.getsize(path + '/fanart.jpg') == 0: - print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]') - DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) - i = i + 1 - continue - else: - break - if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: - print('[!]Image Download Failed! Trying again.') - DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) - print('[+]Image Downloaded!', path + '/fanart.jpg') - elif option == 'kodi': - if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) == 'failed': - moveFailedFolder(filepath, failed_folder) - return - DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) - if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0: - print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg') - return - i = 1 - while i <= int(config['proxy']['retry']): - if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0: - print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]') - DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) - i = i + 1 - continue - else: - break - print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg') - - -def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag): - title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data) - try: - if not os.path.exists(path): - os.makedirs(path) - if option == 'plex': - with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: - print('', file=code) - print("", file=code) - print(" " + naming_rule + part + "", file=code) - print(" ", file=code) - print(" ", file=code) - print(" " + studio + "+", file=code) - print(" " + year + "", file=code) - print(" " + outline + "", file=code) - print(" " + outline + "", file=code) - print(" " + str(runtime).replace(" ", "") + "", file=code) - print(" " + director + "", file=code) - print(" poster.jpg", file=code) - print(" thumb.png", file=code) - print(" fanart.jpg", file=code) - try: - for key, value in actor_photo.items(): - print(" ", file=code) - print(" " + key + "", file=code) - if not value == '': # or actor_photo == []: - print(" " + value + "", file=code) - print(" ", file=code) - except: - aaaa = '' - print(" " + studio + "", file=code) - print(" ", file=code) - if cn_sub == '1': - print(" 中文字幕", file=code) - try: - for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','): - print(" " + i + "", file=code) - except: - aaaaa = '' - try: - for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','): - print(" " + i + "", file=code) - except: - aaaaaaaa = '' - if cn_sub == '1': - print(" 中文字幕", file=code) - print(" " + number + "", file=code) - print(" " + release + "", file=code) - print(" " + cover + "", file=code) - print(" " + website + "", file=code) - print("", file=code) - print("[+]Writeed! " + path + "/" + number + ".nfo") - elif option == 'emby': - with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: - print('', file=code) - print("", file=code) - print(" " + naming_rule + part + "", file=code) - print(" ", file=code) - print(" ", file=code) - print(" " + studio + "+", file=code) - print(" " + year + "", file=code) - print(" " + outline + "", file=code) - print(" " + outline + "", file=code) - print(" " + str(runtime).replace(" ", "") + "", file=code) - print(" " + director + "", file=code) - print(" " + number + c_word + ".png", file=code) - print(" " + number + c_word + ".png", file=code) - print(" " + number + c_word + '.jpg' + "", file=code) - try: - for key, value in actor_photo.items(): - print(" ", file=code) - print(" " + key + "", file=code) - if not value == '': # or actor_photo == []: - print(" " + value + "", file=code) - print(" ", file=code) - except: - aaaa = '' - print(" " + studio + "", file=code) - print(" ", file=code) - if cn_sub == '1': - print(" 中文字幕", file=code) - try: - for i in tag: - print(" " + i + "", file=code) - except: - aaaaa = '' - try: - for i in tag: - print(" " + i + "", file=code) - except: - aaaaaaaa = '' - if cn_sub == '1': - print(" 中文字幕", file=code) - print(" " + number + "", file=code) - print(" " + release + "", file=code) - print(" " + cover + "", file=code) - print(" " + website + "", file=code) - print("", file=code) - print("[+]Writeed! " + path + "/" + number + c_word + ".nfo") - elif option == 'kodi': - with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: - print('', file=code) - print("", file=code) - print(" " + naming_rule + part + "", file=code) - print(" ", file=code) - print(" ", file=code) - print(" " + studio + "+", file=code) - print(" " + year + "", file=code) - print(" " + outline + "", file=code) - print(" " + outline + "", file=code) - print(" " + str(runtime).replace(" ", "") + "", file=code) - print(" " + director + "", file=code) - print(" " + number + c_word + "-poster.jpg", file=code) - print(" " + number + c_word + '-fanart.jpg' + "", file=code) - try: - for key, value in actor_photo.items(): - print(" ", file=code) - print(" " + key + "", file=code) - if not value == '': # or actor_photo == []: - print(" " + value + "", file=code) - print(" ", file=code) - except: - aaaa = '' - print(" " + studio + "", file=code) - print(" ", file=code) - if cn_sub == '1': - print(" 中文字幕", file=code) - try: - for i in tag: - print(" " + i + "", file=code) - except: - aaaaa = '' - try: - for i in tag: - print(" " + i + "", file=code) - except: - aaaaaaaa = '' - if cn_sub == '1': - print(" 中文字幕", file=code) - print(" " + number + "", file=code) - print(" " + release + "", file=code) - print(" " + cover + "", file=code) - print(" " + website + "", file=code) - print("", file=code) - print("[+]Writeed! " + path + "/" + number + c_word + ".nfo") - except IOError as e: - print("[-]Write Failed!") - print(e) - moveFailedFolder(filepath, failed_folder) - return - except Exception as e1: - print(e1) - print("[-]Write Failed!") - moveFailedFolder(filepath, failed_folder) - return - - -def cutImage(option, imagecut, path, number, c_word): - if option == 'plex': - if imagecut == 1: - try: - img = Image.open(path + '/fanart.jpg') - imgSize = img.size - w = img.width - h = img.height - img2 = img.crop((w / 1.9, 0, w, h)) - img2.save(path + '/poster.jpg') - except: - print('[-]Cover cut failed!') - elif imagecut == 0: - img = Image.open(path + '/fanart.jpg') - w = img.width - h = img.height - img.save(path + '/poster.jpg') - elif option == 'emby': - if imagecut == 1: - try: - img = Image.open(path + '/' + number + c_word + '.jpg') - imgSize = img.size - w = img.width - h = img.height - img2 = img.crop((w / 1.9, 0, w, h)) - img2.save(path + '/' + number + c_word + '.png') - except: - print('[-]Cover cut failed!') - elif imagecut == 0: - img = Image.open(path + '/' + number + c_word + '.jpg') - w = img.width - h = img.height - img.save(path + '/' + number + c_word + '.png') - elif option == 'kodi': - if imagecut == 1: - try: - img = Image.open(path + '/' + number + c_word + '-fanart.jpg') - imgSize = img.size - w = img.width - h = img.height - img2 = img.crop((w / 1.9, 0, w, h)) - img2.save(path + '/' + number + c_word + '-poster.jpg') - except: - print('[-]Cover cut failed!') - elif imagecut == 0: - img = Image.open(path + '/' + number + c_word + '-fanart.jpg') - w = img.width - h = img.height - try: - img = img.convert('RGB') - img.save(path + '/' + number + c_word + '-poster.jpg') - except: - img = img.convert('RGB') - img.save(path + '/' + number + c_word + '-poster.jpg') - - -def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置 - houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) - try: - if config['common']['soft_link'] == '1': # 如果soft_link=1 使用软链接 - os.symlink(filepath, path + '/' + number + c_word + houzhui) - else: - os.rename(filepath, path + '/' + number + c_word + houzhui) - if os.path.exists(os.getcwd() + '/' + number + c_word + '.srt'): # 字幕移动 - os.rename(os.getcwd() + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt') - print('[+]Sub moved!') - elif os.path.exists(os.getcwd() + '/' + number + c_word + '.ssa'): - os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa') - print('[+]Sub moved!') - elif os.path.exists(os.getcwd() + '/' + number + c_word + '.sub'): - os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub') - print('[+]Sub moved!') - except FileExistsError: - print('[-]File Exists! Please check your movie!') - print('[-]move to the root folder of the program.') - return - except PermissionError: - print('[-]Error! Please run as administrator!') - return - - -def pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word): # 文件路径,番号,后缀,要移动至的位置 - if multi_part == 1: - number += part # 这时number会被附加上CD1后缀 - houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group()) - try: - if config['common']['soft_link'] == '1': - os.symlink(filepath, path + '/' + number + part + c_word + houzhui) - else: - os.rename(filepath, path + '/' + number + part + c_word + houzhui) - if os.path.exists(number + '.srt'): # 字幕移动 - os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt') - print('[+]Sub moved!') - elif os.path.exists(number + part + c_word + '.ass'): - os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass') - print('[+]Sub moved!') - elif os.path.exists(number + part + c_word + '.sub'): - os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub') - print('[+]Sub moved!') - print('[!]Success') - except FileExistsError: - print('[-]File Exists! Please check your movie!') - print('[-]move to the root folder of the program.') - return - except PermissionError: - print('[-]Error! Please run as administrator!') - return - - -def copyRenameJpgToBackdrop(option, path, number, c_word): - if option == 'plex': - shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg') - shutil.copy(path + '/poster.jpg', path + '/thumb.png') - if option == 'emby': - shutil.copy(path + '/' + number + c_word + '.jpg', path + '/Backdrop.jpg') - if option == 'kodi': - shutil.copy(path + '/' + number + c_word + '-fanart.jpg', path + '/Backdrop.jpg') - - -def get_part(filepath, failed_folder): - try: - if re.search('-CD\d+', filepath): - return re.findall('-CD\d+', filepath)[0] - if re.search('-cd\d+', filepath): - return re.findall('-cd\d+', filepath)[0] - except: - print("[-]failed!Please rename the filename again!") - moveFailedFolder(filepath, failed_folder) - return - - -def debug_mode(json_data): - try: - if config['debug_mode']['switch'] == '1': - print('[+] ---Debug info---') - for i, v in json_data.items(): - if i == 'outline': - print('[+] -', i, ' :', len(v), 'characters') - continue - if i == 'actor_photo' or i == 'year': - continue - print('[+] -', "%-11s" % i, ':', v) - print('[+] ---Debug info---') - except: - aaa = '' - - -def core_main(file_path, number_th): - # =======================================================================初始化所需变量 - multi_part = 0 - part = '' - c_word = '' - option = '' - cn_sub = '' - config_file = 'config.ini' - Config = ConfigParser() - Config.read(config_file, encoding='UTF-8') - try: - option = ReadMediaWarehouse() - except: - print('[-]Config media_warehouse read failed!') - program_mode = Config['common']['main_mode'] # 运行模式 - failed_folder = Config['common']['failed_output_folder'] # 失败输出目录 - success_folder = Config['common']['success_output_folder'] # 成功输出目录 - filepath = file_path # 影片的路径 - number = number_th - json_data = getDataFromJSON(number, filepath, failed_folder) # 定义番号 - if json_data["number"] != number: - # fix issue #119 - # the root cause is we normalize the search id - # PrintFiles() will use the normalized id from website, - # but pasteFileToFolder() still use the input raw search id - # so the solution is: use the normalized search id - number = json_data["number"] - imagecut = json_data['imagecut'] - tag = json_data['tag'] - # =======================================================================判断-C,-CD后缀 - if '-CD' in filepath or '-cd' in filepath: - multi_part = 1 - part = get_part(filepath, failed_folder) - if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: - cn_sub = '1' - c_word = '-C' # 中文字幕影片后缀 - - CreatFailedFolder(failed_folder) # 创建输出失败目录 - debug_mode(json_data) # 调试模式检测 - path = creatFolder(success_folder, json_data['location_rule'], json_data, Config) # 创建文件夹 - # =======================================================================刮削模式 - if program_mode == '1': - if multi_part == 1: - number += part # 这时number会被附加上CD1后缀 - smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, Config, filepath, failed_folder) # 检查小封面 - imageDownload(option, json_data['cover'], number, c_word, path, multi_part, Config, filepath, failed_folder) # creatFoder会返回番号路径 - cutImage(option, imagecut, path, number, c_word) # 裁剪图 - copyRenameJpgToBackdrop(option, path, number, c_word) - PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, failed_folder, tag) # 打印文件 - pasteFileToFolder(filepath, path, number, c_word) # 移动文件 - # =======================================================================整理模式 - elif program_mode == '2': - pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件 +# -*- coding: utf-8 -*- + +import os.path +import shutil +from PIL import Image +import json +from ADC_function import * +from MediaServer import * +from AV_Data_Capture import config +import lazyxml +# =========website======== +from SiteSource import avsox, javdb, fc2fans_club, javbus, fanza, mgstage +import requests +from enum import Enum, auto + + +# =====================本地文件处理=========================== + +def escapePath(path, escapeLiterals): # Remove escape literals + # escapeLiterals = Config['escape']['literals'] + backslash = '\\' + for literal in escapeLiterals: + path = path.replace(backslash + literal, '') + return path + + +def moveFailedFolder(filepath, failed_folder): + if failed_folder.strip() == '': + print('[+]Failed output folder is Empty') + else: + print('[-]Move to Failed output folder') + shutil.move(filepath, failed_folder) + return + + +def CreatFailedFolder(failed_folder): + if not os.path.exists(failed_folder + '/'): # 新建failed文件夹 + try: + os.makedirs(failed_folder + '/') + except: + print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)") + return + + # 根据番号获取字典数据 + + +class SiteSource(Enum): + AVSOX = auto() + FC2 = auto() + FANZA = auto() + JAVDB = auto() + JAVBUS = auto() + MGSTAGE = auto() + + +def getDataFromJSON(file_number): # 从JSON返回元数据 + """ + iterate through all services and fetch the data + """ + + func_mapping = { + "avsox": avsox.main, + "fc2": fc2fans_club.main, + "fanza": fanza.main, + "javdb": javdb.main, + "javbus": javbus.main, + "mgstage": mgstage.main, + } + + # default fetch order list, from the begining to the end + sources = ["javbus", "javdb", "fanza", "mgstage", "fc2", "avsox"] + + # if the input file name matches centain rules, + # move some web service to the begining of the list + if re.match(r"^\d{5,}", file_number) or re.match(r'heyzo', file_number, re.IGNORECASE): + sources.insert(0, sources.pop(sources.index("avsox"))) + elif re.match(r"\d+\D+", file_number) or re.match(r'siro', file_number, re.IGNORECASE): + sources.insert(0, sources.pop(sources.index("mgstage"))) + sources.insert(0, sources.pop(sources.index("fanza"))) + elif re.match(r'fc2', file_number, re.IGNORECASE): + sources.insert(0, sources.pop(sources.index("fc2"))) + + for source in sources: + json_data = json.loads(func_mapping[source](file_number)) + # if any service return a valid return, break + if getDataState(json_data) != 0: + break + + # ================================================网站规则添加结束================================================ + + title = json_data['title'] + actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表 + release = json_data['release'] + number = json_data['number'] + studio = json_data['studio'] + source = json_data['source'] + runtime = json_data['runtime'] + outline = json_data['runtime'] + label = json_data['label'] + year = json_data['year'] + try: + cover_small = json_data['cover_small'] + except: + cover_small = '' + + imagecut = json_data['imagecut'] + tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @ + actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '') + + if title == '' or number == '': + raise Exception('[-]Movie Data not found!') + + # if imagecut == '3': + # DownloadFileWithFilename() + + # ====================处理异常字符====================== #\/:*?"<>| + title = re.sub(r'[#\\/:*?"<>|\]]', '', title, 0, re.IGNORECASE) + release = release.replace('/', '-') + tmpArr = cover_small.split(',') + if len(tmpArr) > 0: + cover_small = tmpArr[0].strip('\"').strip('\'') + # ====================处理异常字符 END================== #\/:*?"<>| + + naming_rule = eval(config.naming_rule) + location_rule = eval(config.location_rule) + + # 返回处理后的json_data + json_data['title'] = title + json_data['actor'] = actor + json_data['release'] = release + json_data['cover_small'] = cover_small + json_data['tag'] = tag + json_data['naming_rule'] = naming_rule + json_data['location_rule'] = location_rule + json_data['year'] = year + return json_data + + +def get_info(json_data): # 返回json里的数据 + title = json_data['title'] + studio = json_data['studio'] + year = json_data['year'] + outline = json_data['outline'] + runtime = json_data['runtime'] + director = json_data['director'] + actor_photo = json_data['actor_photo'] + release = json_data['release'] + number = json_data['number'] + cover = json_data['cover'] + website = json_data['website'] + return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website + + +def download_cover_file(url, name, folder_path): + """ + download small cover + :param url: url + :param name: name same as movie's name without ext + :param folder_path: dir to save + :return: + """ + filename = config.media_server.poster_name(name) + DownloadFileWithFilename(url, filename, folder_path) + + +def smallCoverCheck(path, number, imagecut, cover_small, c_word, option, filepath, failed_folder): + if imagecut == 3: + if option == 'emby': + DownloadFileWithFilename(cover_small, '1.jpg', path, filepath, failed_folder) + try: + img = Image.open(path + '/1.jpg') + except Exception: + img = Image.open('1.jpg') + w = img.width + h = img.height + img.save(path + '/' + number + c_word + '.png') + time.sleep(1) + os.remove(path + '/1.jpg') + if option == 'kodi': + DownloadFileWithFilename(cover_small, '1.jpg', path, filepath, failed_folder) + try: + img = Image.open(path + '/1.jpg') + except Exception: + img = Image.open('1.jpg') + w = img.width + h = img.height + img.save(path + '/' + number + c_word + '-poster.jpg') + time.sleep(1) + os.remove(path + '/1.jpg') + if option == 'plex': + DownloadFileWithFilename(cover_small, '1.jpg', path, filepath, failed_folder) + try: + img = Image.open(path + '/1.jpg') + except Exception: + img = Image.open('1.jpg') + w = img.width + h = img.height + img.save(path + '/poster.jpg') + os.remove(path + '/1.jpg') + + +def creatFolder(success_folder, location_rule, json_data, escapeLiterals): # 创建文件夹 + title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data) + if len(location_rule) > 240: # 新建成功输出文件夹 + path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor", + "'manypeople'", + 3) # path为影片+元数据所在目录 + else: + path = success_folder + '/' + location_rule + # print(path) + if not os.path.exists(path): + path = escapePath(path, escapeLiterals) + try: + os.makedirs(path) + except: + path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number") + path = escapePath(path, escapeLiterals) + os.makedirs(path) + return path + + +# =====================资源下载部分=========================== +def download_file(url, folder, name_with_ext): + """ + download file + :param url: source url + :param name_with_ext: full name like 'mike.jpg' + :param folder: folder path + :return: full path if downloaded file like '/Users/proj/AV_Data_Capture/mike.jpg' + """ + proxy_dict = {"http": str(config.proxy), "https": str(config.proxy)} if config.proxy else None + i = 0 + while i < config.retry: + try: + if not os.path.exists(folder): + os.makedirs(folder) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} + r = requests.get(url, headers=headers, timeout=config.timeout, proxies=proxy_dict) + if r == '': + print('[-]Movie Data not found!') + return + with open(str(folder) + "/" + name_with_ext, "wb") as code: + code.write(r.content) + return str(folder) + "/" + name_with_ext + except requests.exceptions.RequestException: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry)) + except requests.exceptions.ConnectionError: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry)) + except requests.exceptions.ProxyError: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry)) + except requests.exceptions.ConnectTimeout: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry)) + + +def DownloadFileWithFilename(url, filename, path): # path = examle:photo , video.in the Project Folder! + proxy, timeout, retry_count = get_network_settings() + i = 0 + proxy_dict = {"http": str(config.proxy), "https": str(config.proxy)} if proxy else None + while i < retry_count: + try: + if not os.path.exists(path): + os.makedirs(path) + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} + r = requests.get(url, headers=headers, timeout=timeout, + proxies=proxy_dict) + if r == '': + print('[-]Movie Data not found!') + return + with open(str(path) + "/" + filename, "wb") as code: + code.write(r.content) + return + except requests.exceptions.RequestException: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) + except requests.exceptions.ConnectionError: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) + except requests.exceptions.ProxyError: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) + except requests.exceptions.ConnectTimeout: + i += 1 + print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count)) + print('[-]Connect Failed! Please check your Proxy or Network!') + # moveFailedFolder(filepath, failed_folder) + return + + +def download_image(url, name, folder): + """ + download img + :param url: source + :param name: name + :param folder: folder to save + :return: + """ + name_with_ext = config.media_server.image_name(name) + download_file(url, folder, name_with_ext) + + +def imageDownload(option, cover, number, c_word, path, multi_part, filepath, failed_folder): # 封面是否下载成功,否则移动到failed + if option == 'emby': # name.jpg + if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder) == 'failed': + moveFailedFolder(filepath, failed_folder) + return + DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder) + if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: + print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') + return + i = 1 + while i <= int(config.retry): + if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: + print('[!]Image Download Failed! Trying again. [' + config.retry + '/3]') + DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder) + i = i + 1 + continue + else: + break + if multi_part == 1: + old_name = os.path.join(path, number + c_word + '.jpg') + new_name = os.path.join(path, number + c_word + '.jpg') + os.rename(old_name, new_name) + print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') + else: + print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg') + elif option == 'plex': # fanart.jpg + if DownloadFileWithFilename(cover, 'fanart.jpg', path, filepath, failed_folder) == 'failed': + moveFailedFolder(filepath, failed_folder) + return + DownloadFileWithFilename(cover, 'fanart.jpg', path, filepath, failed_folder) + if not os.path.getsize(path + '/fanart.jpg') == 0: + print('[+]Image Downloaded!', path + '/fanart.jpg') + return + i = 1 + while i <= int(config.retry): + if os.path.getsize(path + '/fanart.jpg') == 0: + print('[!]Image Download Failed! Trying again. [' + config.retry + '/3]') + DownloadFileWithFilename(cover, 'fanart.jpg', path, filepath, failed_folder) + i = i + 1 + continue + else: + break + if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0: + print('[!]Image Download Failed! Trying again.') + DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder) + print('[+]Image Downloaded!', path + '/fanart.jpg') + elif option == 'kodi': # [name]-fanart.jpg + if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, filepath, failed_folder) == 'failed': + moveFailedFolder(filepath, failed_folder) + return + DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, filepath, failed_folder) + if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0: + print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg') + return + i = 1 + while i <= int(config.retry): + if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0: + print('[!]Image Download Failed! Trying again. [' + config.retry + '/3]') + DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, filepath, failed_folder) + i = i + 1 + continue + else: + break + print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg') + + +def make_nfo_file(nfo, nfo_name, folder_path): + """ + make xxx.nfo in folder + :param nfo_name: name + :param nfo: nfo dict + :param folder_path: where to create file, default temp_folder + :return: + """ + title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(nfo) + naming_rule = nfo['naming_rule'] + tag = nfo['tag'] + + path = folder_path + c_word = '' + cn_sub = '' + part = '' + # path_file = path + "/" + number + c_word + ".nfo", "wt" + path_file = path + "/" + nfo_name + c_word + ".nfo" + lazyxml.dump + try: + if not os.path.exists(path): + os.makedirs(path) + if config.media_server == MediaServer.PLEX: + with open(path_file, "wt", encoding='UTF-8') as code: + print('', file=code) + print("", file=code) + print(" " + naming_rule + part + "", file=code) + print(" ", file=code) + print(" ", file=code) + print(" " + studio + "+", file=code) + print(" " + year + "", file=code) + print(" " + outline + "", file=code) + print(" " + outline + "", file=code) + print(" " + str(runtime).replace(" ", "") + "", file=code) + print(" " + director + "", file=code) + print(" poster.jpg", file=code) + print(" thumb.png", file=code) + print(" fanart.jpg", file=code) + try: + for key, value in actor_photo.items(): + print(" ", file=code) + print(" " + key + "", file=code) + if not value == '': # or actor_photo == []: + print(" " + value + "", file=code) + print(" ", file=code) + except: + aaaa = '' + print(" " + studio + "", file=code) + print(" ", file=code) + if cn_sub == '1': + print(" 中文字幕", file=code) + try: + for i in str(tag).strip("[ ]").replace("'", '').replace(" ", '').split(','): + print(" " + i + "", file=code) + except: + aaaaa = '' + try: + for i in str(tag).strip("[ ]").replace("'", '').replace(" ", '').split(','): + print(" " + i + "", file=code) + except: + aaaaaaaa = '' + if cn_sub == '1': + print(" 中文字幕", file=code) + print(" " + number + "", file=code) + print(" " + release + "", file=code) + print(" " + cover + "", file=code) + print(" " + website + "", file=code) + print("", file=code) + print("[+]Writeed! " + path + "/" + number + ".nfo") + elif config.media_server == MediaServer.EMBY: + with open(path_file, "wt", encoding='UTF-8') as code: + print('', file=code) + print("", file=code) + print(" " + naming_rule + part + "", file=code) + print(" ", file=code) + print(" ", file=code) + print(" " + studio + "+", file=code) + print(" " + year + "", file=code) + print(" " + outline + "", file=code) + print(" " + outline + "", file=code) + print(" " + str(runtime).replace(" ", "") + "", file=code) + print(" " + director + "", file=code) + print(" " + number + c_word + ".png", file=code) + print(" " + number + c_word + ".png", file=code) + print(" " + number + c_word + '.jpg' + "", file=code) + try: + for key, value in actor_photo.items(): + print(" ", file=code) + print(" " + key + "", file=code) + if not value == '': # or actor_photo == []: + print(" " + value + "", file=code) + print(" ", file=code) + except: + aaaa = '' + print(" " + studio + "", file=code) + print(" ", file=code) + if cn_sub == '1': + print(" 中文字幕", file=code) + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaa = '' + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaaaaa = '' + if cn_sub == '1': + print(" 中文字幕", file=code) + print(" " + number + "", file=code) + print(" " + release + "", file=code) + print(" " + cover + "", file=code) + print(" " + website + "", file=code) + print("", file=code) + print("[+]Writeed! " + path + "/" + number + c_word + ".nfo") + elif config.media_server == MediaServer.KODI: + with open(path_file, "wt", encoding='UTF-8') as code: + print('', file=code) + print("", file=code) + print(" " + naming_rule + part + "", file=code) + print(" ", file=code) + print(" ", file=code) + print(" " + studio + "+", file=code) + print(" " + year + "", file=code) + print(" " + outline + "", file=code) + print(" " + outline + "", file=code) + print(" " + str(runtime).replace(" ", "") + "", file=code) + print(" " + director + "", file=code) + print(" " + number + c_word + "-poster.jpg", file=code) + print(" " + number + c_word + '-fanart.jpg' + "", file=code) + try: + for key, value in actor_photo.items(): + print(" ", file=code) + print(" " + key + "", file=code) + if not value == '': # or actor_photo == []: + print(" " + value + "", file=code) + print(" ", file=code) + except: + aaaa = '' + print(" " + studio + "", file=code) + print(" ", file=code) + if cn_sub == '1': + print(" 中文字幕", file=code) + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaa = '' + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaaaaa = '' + if cn_sub == '1': + print(" 中文字幕", file=code) + print(" " + number + "", file=code) + print(" " + release + "", file=code) + print(" " + cover + "", file=code) + print(" " + website + "", file=code) + print("", file=code) + print("[+]Writeed! " + path + "/" + number + c_word + ".nfo") + except IOError as e: + print("[-]Write Failed! :" + e) + # print(e) + # moveFailedFolder(filepath, failed_folder) + return + except Exception as e: + print("[-]Write Failed! :" + e) + # moveFailedFolder(filepath, failed_folder) + return + + +def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag): + title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data) + try: + if not os.path.exists(path): + os.makedirs(path) + if option == 'plex': + with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: + print('', file=code) + print("", file=code) + print(" " + naming_rule + part + "", file=code) + print(" ", file=code) + print(" ", file=code) + print(" " + studio + "+", file=code) + print(" " + year + "", file=code) + print(" " + outline + "", file=code) + print(" " + outline + "", file=code) + print(" " + str(runtime).replace(" ", "") + "", file=code) + print(" " + director + "", file=code) + print(" poster.jpg", file=code) + print(" thumb.png", file=code) + print(" fanart.jpg", file=code) + try: + for key, value in actor_photo.items(): + print(" ", file=code) + print(" " + key + "", file=code) + if not value == '': # or actor_photo == []: + print(" " + value + "", file=code) + print(" ", file=code) + except: + aaaa = '' + print(" " + studio + "", file=code) + print(" ", file=code) + if cn_sub == '1': + print(" 中文字幕", file=code) + try: + for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','): + print(" " + i + "", file=code) + except: + aaaaa = '' + try: + for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','): + print(" " + i + "", file=code) + except: + aaaaaaaa = '' + if cn_sub == '1': + print(" 中文字幕", file=code) + print(" " + number + "", file=code) + print(" " + release + "", file=code) + print(" " + cover + "", file=code) + print(" " + website + "", file=code) + print("", file=code) + print("[+]Writeed! " + path + "/" + number + ".nfo") + elif option == 'emby': + with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: + print('', file=code) + print("", file=code) + print(" " + naming_rule + part + "", file=code) + print(" ", file=code) + print(" ", file=code) + print(" " + studio + "+", file=code) + print(" " + year + "", file=code) + print(" " + outline + "", file=code) + print(" " + outline + "", file=code) + print(" " + str(runtime).replace(" ", "") + "", file=code) + print(" " + director + "", file=code) + print(" " + number + c_word + ".png", file=code) + print(" " + number + c_word + ".png", file=code) + print(" " + number + c_word + '.jpg' + "", file=code) + try: + for key, value in actor_photo.items(): + print(" ", file=code) + print(" " + key + "", file=code) + if not value == '': # or actor_photo == []: + print(" " + value + "", file=code) + print(" ", file=code) + except: + aaaa = '' + print(" " + studio + "", file=code) + print(" ", file=code) + if cn_sub == '1': + print(" 中文字幕", file=code) + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaa = '' + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaaaaa = '' + if cn_sub == '1': + print(" 中文字幕", file=code) + print(" " + number + "", file=code) + print(" " + release + "", file=code) + print(" " + cover + "", file=code) + print(" " + website + "", file=code) + print("", file=code) + print("[+]Writeed! " + path + "/" + number + c_word + ".nfo") + elif option == 'kodi': + with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code: + print('', file=code) + print("", file=code) + print(" " + naming_rule + part + "", file=code) + print(" ", file=code) + print(" ", file=code) + print(" " + studio + "+", file=code) + print(" " + year + "", file=code) + print(" " + outline + "", file=code) + print(" " + outline + "", file=code) + print(" " + str(runtime).replace(" ", "") + "", file=code) + print(" " + director + "", file=code) + print(" " + number + c_word + "-poster.jpg", file=code) + print(" " + number + c_word + '-fanart.jpg' + "", file=code) + try: + for key, value in actor_photo.items(): + print(" ", file=code) + print(" " + key + "", file=code) + if not value == '': # or actor_photo == []: + print(" " + value + "", file=code) + print(" ", file=code) + except: + aaaa = '' + print(" " + studio + "", file=code) + print(" ", file=code) + if cn_sub == '1': + print(" 中文字幕", file=code) + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaa = '' + try: + for i in tag: + print(" " + i + "", file=code) + except: + aaaaaaaa = '' + if cn_sub == '1': + print(" 中文字幕", file=code) + print(" " + number + "", file=code) + print(" " + release + "", file=code) + print(" " + cover + "", file=code) + print(" " + website + "", file=code) + print("", file=code) + print("[+]Writeed! " + path + "/" + number + c_word + ".nfo") + except IOError as e: + print("[-]Write Failed!") + print(e) + moveFailedFolder(filepath, failed_folder) + return + except Exception as e1: + print(e1) + print("[-]Write Failed!") + moveFailedFolder(filepath, failed_folder) + return + + +def crop_image(crop_style, name, path): + try: + origin_image = Image.open(path + '/' + config.media_server.image_name(name)) + if crop_style == 1: + cropped_image = origin_image.crop((origin_image.width / 1.9, 0, origin_image.width, origin_image.height)) + else: + cropped_image = origin_image + cropped_image.save(path + '/' + config.media_server.poster_name(name)) + + except Exception as e: + print('[-]Cover cut failed:' + e) + + +def cutImage(option, imagecut, path, number, c_word): + if option == 'plex': + if imagecut == 1: # 截取右侧封面 fanart.jpg 截取为poster.jpg + try: + img = Image.open(path + '/fanart.jpg') + imgSize = img.size + w = img.width + h = img.height + img2 = img.crop((w / 1.9, 0, w, h)) + img2.save(path + '/poster.jpg') + except: + print('[-]Cover cut failed!') + elif imagecut == 0: # 改名 fanart.jpg ->poster.jpg + img = Image.open(path + '/fanart.jpg') + w = img.width + h = img.height + img.save(path + '/poster.jpg') + elif option == 'emby': + if imagecut == 1: # 截取右侧封面 [name].jpg 截取为 [name].jpg + try: + img = Image.open(path + '/' + number + c_word + '.jpg') + imgSize = img.size + w = img.width + h = img.height + img2 = img.crop((w / 1.9, 0, w, h)) + img2.save(path + '/' + number + c_word + '.png') + except: + print('[-]Cover cut failed!') + elif imagecut == 0: # [name].jpg -> [name].png + img = Image.open(path + '/' + number + c_word + '.jpg') + img.save(path + '/' + number + c_word + '.png') + elif option == 'kodi': + if imagecut == 1: # 截取右侧封面 [name]-fanart.jpg 截取为 [name]-poster.jpg + try: + img = Image.open(path + '/' + number + c_word + '-fanart.jpg') + w = img.width + h = img.height + img2 = img.crop((w / 1.9, 0, w, h)) + img2.save(path + '/' + number + c_word + '-poster.jpg') + except: + print('[-]Cover cut failed!') + elif imagecut == 0: # [name]-fanart.jpg 截取为 [name]-poster.jpg + img = Image.open(path + '/' + number + c_word + '-fanart.jpg') + + try: + img = img.convert('RGB') + img.save(path + '/' + number + c_word + '-poster.jpg') + except: + img = img.convert('RGB') + img.save(path + '/' + number + c_word + '-poster.jpg') + + +def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置 + houzhui = str(re.search('[.](avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath, re.IGNORECASE).group()) + try: + if config.soft_link == '1': # 如果soft_link=1 使用软链接 + os.symlink(filepath, path + '/' + number + c_word + houzhui) + else: + os.rename(filepath, path + '/' + number + c_word + houzhui) + if os.path.exists(config.search_folder + '/' + number + c_word + '.srt'): # 字幕移动 + os.rename(config.search_folder + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt') + print('[+]Sub moved!') + elif os.path.exists(config.search_folder + '/' + number + c_word + '.ssa'): + os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa') + print('[+]Sub moved!') + elif os.path.exists(config.search_folder + '/' + number + c_word + '.sub'): + os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub') + print('[+]Sub moved!') + except FileExistsError: + print('[-]File Exists! Please check your movie!') + print('[-]move to the root folder of the program.') + return + except PermissionError: + print('[-]Error! Please run as administrator!') + return + + +def pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word): # 文件路径,番号,后缀,要移动至的位置 + if multi_part == 1: + number += part # 这时number会被附加上CD1后缀 + houzhui = str(re.search('[.](avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath, re.IGNORECASE).group()) + try: + if config.soft_link== '1': + os.symlink(filepath, path + '/' + number + part + c_word + houzhui) + else: + os.rename(filepath, path + '/' + number + part + c_word + houzhui) + if os.path.exists(number + '.srt'): # 字幕移动 + os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt') + print('[+]Sub moved!') + elif os.path.exists(number + part + c_word + '.ass'): + os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass') + print('[+]Sub moved!') + elif os.path.exists(number + part + c_word + '.sub'): + os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub') + print('[+]Sub moved!') + print('[!]Success') + except FileExistsError: + print('[-]File Exists! Please check your movie!') + print('[-]move to the root folder of the program.') + return + except PermissionError: + print('[-]Error! Please run as administrator!') + return + + +def copy_images_to_background_image(name, path): + shutil.copy(path + "/" + config.media_server.image_name(name), path + "/Backdrop.jpg") + if config.media_server == MediaServer.PLEX: + shutil.copy(path + "/" + config.media_server.poster_name(name), path + '/thumb.png') + + +def copyRenameJpgToBackdrop(option, path, number, c_word): + if option == 'plex': + shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg') + shutil.copy(path + '/poster.jpg', path + '/thumb.png') + if option == 'emby': + shutil.copy(path + '/' + number + c_word + '.jpg', path + '/Backdrop.jpg') + if option == 'kodi': + shutil.copy(path + '/' + number + c_word + '-fanart.jpg', path + '/Backdrop.jpg') + + +def get_part(filepath, failed_folder): + try: + if re.search('-CD\d+', filepath): + return re.findall('-CD\d+', filepath)[0] + if re.search('-cd\d+', filepath): + return re.findall('-cd\d+', filepath)[0] + except: + print("[-]failed!Please rename the filename again!") + moveFailedFolder(filepath, failed_folder) + return + + +def debug_mode(json_data): + try: + if config.debug_mode == '1': + print('[+] ---Debug info---') + for i, v in json_data.items(): + if i == 'outline': + print('[+] -', i, ' :', len(v), 'characters') + continue + if i == 'actor_photo' or i == 'year': + continue + print('[+] -', "%-11s" % i, ':', v) + print('[+] ---Debug info---') + except: + aaa = '' + + +def core_main(number_th): + # =======================================================================初始化所需变量 + multi_part = 0 + part = '' + c_word = '' + option = '' + cn_sub = '' + + # filepath = file_path # 影片的路径 + number = number_th + + json_data = getDataFromJSON(number) # 定义番号 + + # if json_data.get('number') != number: + # fix issue #119 + # the root cause is we normalize the search id + # PrintFiles() will use the normalized id from website, + # but pasteFileToFolder() still use the input raw search id + # so the solution is: use the normalized search id + # number = json_data["number"] + # imagecut = json_data['imagecut'] + # tag = json_data['tag'] + # =======================================================================判断-C,-CD后缀 + # if '-CD' in filepath or '-cd' in filepath: + # multi_part = 1 + # part = get_part(filepath, config.failed_folder) + + # if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath: + # cn_sub = '1' + # c_word = '-C' # 中文字幕影片后缀 + + # CreatFailedFolder(config.failed_folder) # 创建输出失败目录 + # debug_mode(json_data) # 调试模式检测 + return json_data + # path = creatFolder(config.success_folder, json_data['location_rule'], json_data, config.escape_literals) # 创建文件夹 + # =======================================================================刮削模式 + # if config.program_mode == '1': + # if multi_part == 1: + # number += part # 这时number会被附加上CD1后缀 + # smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, filepath, config.failed_folder) # 检查小封面 + # imageDownload(option, json_data['cover'], number, c_word, path, multi_part, filepath, config.failed_folder) # creatFoder会返回番号路径 + # cutImage(option, imagecut, path, number, c_word) # 裁剪图 + # copyRenameJpgToBackdrop(option, path, number, c_word) + # PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, config.failed_folder, tag) # 打印文件 .nfo + # pasteFileToFolder(filepath, path, number, c_word) # 移动文件 + # # =======================================================================整理模式 + # elif config.program_mode == '2': + # pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件 diff --git a/jav321.py b/jav321.py deleted file mode 100644 index 1259553..0000000 --- a/jav321.py +++ /dev/null @@ -1,73 +0,0 @@ -import json -from bs4 import BeautifulSoup -from lxml import html -from ADC_function import post_html - - -def main(number: str) -> json: - result = post_html(url="https://www.jav321.com/search", query={"sn": number}) - soup = BeautifulSoup(result.text, "html.parser") - lx = html.fromstring(str(soup)) - - if "/video/" in result.url: - data = parse_info(soup=soup) - dic = { - "title": get_title(lx=lx), - "studio": "", - "year": data["release"][:4], - "outline": get_outline(lx=lx), - "director": "", - "cover": get_cover(lx=lx), - "imagecut": 1, - "actor_photo": "", - "website": result.url, - "source": "jav321.py", - **data, - } - else: - dic = {} - - return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) - - -def get_title(lx: html.HtmlElement) -> str: - return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip() - - -def parse_info(soup: BeautifulSoup) -> dict: - data = str(soup.select_one("div.row > div.col-md-9")).split("
") - - return { - "actor": get_anchor_info(h=data[0]), - "label": get_anchor_info(h=data[1]), - "tag": get_anchor_info(h=data[2]), - "number": get_text_info(h=data[3]), - "release": get_text_info(h=data[4]), - "runtime": get_text_info(h=data[5]), - } - - -def get_anchor_info(h: str) -> str: - result = [] - - data = BeautifulSoup(h, "html.parser").find_all("a", href=True) - for d in data: - result.append(d.text) - - return ",".join(result) - - -def get_text_info(h: str) -> str: - return h.split(": ")[1] - - -def get_cover(lx: html.HtmlElement) -> str: - return lx.xpath("/html/body/div[2]/div[2]/div[1]/p/a/img/@src")[0] - - -def get_outline(lx: html.HtmlElement) -> str: - return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0] - - -if __name__ == "__main__": - print(main("wmc-002")) diff --git a/readme/._readme1.PNG b/readme/._readme1.PNG new file mode 100755 index 0000000..0ea2059 Binary files /dev/null and b/readme/._readme1.PNG differ diff --git a/readme/._readme2.PNG b/readme/._readme2.PNG new file mode 100755 index 0000000..388f797 Binary files /dev/null and b/readme/._readme2.PNG differ diff --git a/readme/._readme4.PNG b/readme/._readme4.PNG new file mode 100755 index 0000000..eb24b60 Binary files /dev/null and b/readme/._readme4.PNG differ diff --git a/readme/This is readms.md's images folder b/readme/This is readms.md's images folder old mode 100644 new mode 100755 diff --git a/readme/flow_chart2.png b/readme/flow_chart2.png old mode 100644 new mode 100755 diff --git a/readme/readme1.PNG b/readme/readme1.PNG old mode 100644 new mode 100755 diff --git a/readme/readme2.PNG b/readme/readme2.PNG old mode 100644 new mode 100755 diff --git a/readme/readme3.PNG b/readme/readme3.PNG old mode 100644 new mode 100755 diff --git a/readme/readme4.PNG b/readme/readme4.PNG old mode 100644 new mode 100755 diff --git a/readme/single.gif b/readme/single.gif old mode 100644 new mode 100755 diff --git a/resource/This is readms.md's images folder b/resource/This is readms.md's images folder new file mode 100755 index 0000000..d00491f --- /dev/null +++ b/resource/This is readms.md's images folder @@ -0,0 +1 @@ +1 diff --git a/resource/flow_chart2.png b/resource/flow_chart2.png new file mode 100755 index 0000000..4daf728 Binary files /dev/null and b/resource/flow_chart2.png differ diff --git a/resource/readme1.PNG b/resource/readme1.PNG new file mode 100755 index 0000000..b3d0a21 Binary files /dev/null and b/resource/readme1.PNG differ diff --git a/resource/readme2.PNG b/resource/readme2.PNG new file mode 100755 index 0000000..f002931 Binary files /dev/null and b/resource/readme2.PNG differ diff --git a/resource/readme3.PNG b/resource/readme3.PNG new file mode 100755 index 0000000..81e05cd Binary files /dev/null and b/resource/readme3.PNG differ diff --git a/resource/readme4.PNG b/resource/readme4.PNG new file mode 100755 index 0000000..26a2cf4 Binary files /dev/null and b/resource/readme4.PNG differ diff --git a/resource/ruquirments.txt b/resource/ruquirments.txt new file mode 100755 index 0000000..97951df --- /dev/null +++ b/resource/ruquirments.txt @@ -0,0 +1 @@ +pipenv install -rlxml bs4 pillow pyquery \ No newline at end of file diff --git a/resource/single.gif b/resource/single.gif new file mode 100755 index 0000000..4b9c371 Binary files /dev/null and b/resource/single.gif differ diff --git a/ruquirments.txt b/ruquirments.txt deleted file mode 100644 index aa091a0..0000000 --- a/ruquirments.txt +++ /dev/null @@ -1,4 +0,0 @@ -lxml -bs4 -pillow -pyquery \ No newline at end of file diff --git a/test.py b/test.py new file mode 100755 index 0000000..5ebb19e --- /dev/null +++ b/test.py @@ -0,0 +1,80 @@ +import os +import re +from itertools import groupby + +import fuckit as fuckit +import pandas as pd +from tenacity import retry, stop_after_delay, wait_fixed + + +def go(): + a = [1, 2, 3, 4, 5, 6] + # [print(x) for x in a] + # [print(x) for x in a] + a1 = groupby(a, key=lambda k: (k / 2)) + for i in a1: + print(i) + for i in a1: + print(i) + + +class TryDo: + def __init__(self, func, times=3): + self.tries = times + self.func = func + + def __iter__(self): + self.currentTry = 1 + return self + + def __next__(self): + if self.currentTry > self.tries: + raise StopIteration(False) + else: + self.currentTry += 1 + self.func() + raise StopIteration(True) + + # def do(self): + + +@retry(stop=stop_after_delay(3), wait=wait_fixed(2)) +def stop_after_10_s(): + print("Stopping after 10 seconds") + raise Exception + + +# f = iter( TryDo(do_something, 5)) + +# stop_after_10_s() +def errorfunc(): + raise Exception + + +def okfunc(): + print("ok") + + +# with fuckit: +# errorfunc() +# okfunc() +# re.match() + +r = re.search(r'(?<=999)-?((?P([A-Z](?![A-Z])))|(?P\d(?!\d)))', "IPTD-999-B-彼女の姉貴とイケナイ関係-RIO", re.I) +# +print(r.groupdict()) +print(r.groupdict()['alpha']) +print(r.group(2)) +import re + +line = "Cats are smarter than dogs" +matchObj = re.search(r'(?<=a)(.*) are (.*?) .*', line, re.M | re.I) +if matchObj: + print("matchObj.group() : ", matchObj.group()) + print("matchObj.group(1) : ", matchObj.group(1)) + print("matchObj.group(2) : ", matchObj.group(2)) +else: + print("No match!!") + +# print(r[-1]) +# print(newList) diff --git a/update_check.json b/update_check.json old mode 100644 new mode 100755