diff --git a/.gitignore b/.gitignore
index 894a44c..cdc48c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+*.DS_Store
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..e7e9d11
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,2 @@
+# Default ignored files
+/workspace.xml
diff --git a/.idea/AV_Data_Capture.iml b/.idea/AV_Data_Capture.iml
new file mode 100644
index 0000000..21f057a
--- /dev/null
+++ b/.idea/AV_Data_Capture.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/dictionaries/tanpengsccd.xml b/.idea/dictionaries/tanpengsccd.xml
new file mode 100644
index 0000000..d7229c1
--- /dev/null
+++ b/.idea/dictionaries/tanpengsccd.xml
@@ -0,0 +1,19 @@
+
+
+
+ avsox
+ emby
+ fanart
+ fanza
+ javbus
+ javdb
+ jellyfin
+ khtml
+ kodi
+ mgstage
+ plex
+ pondo
+ rmvb
+
+
+
\ No newline at end of file
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..a4410bf
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,7 @@
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..9337de9
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/other.xml b/.idea/other.xml
new file mode 100644
index 0000000..a708ec7
--- /dev/null
+++ b/.idea/other.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..94a25f7
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ADC_function.py b/ADC_function.py
index 596a9ea..04708d6 100755
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -1,136 +1,127 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import requests
-from configparser import ConfigParser
-import os
-import re
-import time
-import sys
-from lxml import etree
-import sys
-import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-# sys.setdefaultencoding('utf-8')
-
-config_file='config.ini'
-config = ConfigParser()
-
-if os.path.exists(config_file):
- try:
- config.read(config_file, encoding='UTF-8')
- except:
- print('[-]Config.ini read failed! Please use the offical file!')
-else:
- print('[+]config.ini: not found, creating...',end='')
- with open("config.ini", "wt", encoding='UTF-8') as code:
- print("[common]", file=code)
- print("main_mode = 1", file=code)
- print("failed_output_folder = failed", file=code)
- print("success_output_folder = JAV_output", file=code)
- print("", file=code)
- print("[proxy]",file=code)
- print("proxy=127.0.0.1:1081",file=code)
- print("timeout=10", file=code)
- print("retry=3", file=code)
- print("", file=code)
- print("[Name_Rule]", file=code)
- print("location_rule=actor+'/'+number",file=code)
- print("naming_rule=number+'-'+title",file=code)
- print("", file=code)
- print("[update]",file=code)
- print("update_check=1",file=code)
- print("", file=code)
- print("[media]", file=code)
- print("media_warehouse=emby", file=code)
- print("#emby plex kodi", file=code)
- print("", file=code)
- print("[escape]", file=code)
- print("literals=\\", file=code)
- print("", file=code)
- print("[movie_location]", file=code)
- print("path=", file=code)
- print("", file=code)
- print('.',end='')
- time.sleep(2)
- print('.')
- print('[+]config.ini: created!')
- print('[+]Please restart the program!')
- time.sleep(4)
- os._exit(0)
- try:
- config.read(config_file, encoding='UTF-8')
- except:
- print('[-]Config.ini read failed! Please use the offical file!')
-
-def get_network_settings():
- try:
- proxy = config["proxy"]["proxy"]
- timeout = int(config["proxy"]["timeout"])
- retry_count = int(config["proxy"]["retry"])
- assert timeout > 0
- assert retry_count > 0
- except:
- raise ValueError("[-]Proxy config error! Please check the config.")
- return proxy, timeout, retry_count
-
-def getDataState(json_data): # 元数据获取失败检测
- if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
- return 0
- else:
- return 1
-
-def ReadMediaWarehouse():
- return config['media']['media_warehouse']
-
-def UpdateCheckSwitch():
- check=str(config['update']['update_check'])
- if check == '1':
- return '1'
- elif check == '0':
- return '0'
- elif check == '':
- return '0'
-
-def getXpathSingle(htmlcode,xpath):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result1 = str(html.xpath(xpath)).strip(" ['']")
- return result1
-
-def get_html(url,cookies = None):#网页请求核心
- proxy, timeout, retry_count = get_network_settings()
- i = 0
- while i < retry_count:
- try:
- if not proxy == '':
- proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
- getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
- getweb.encoding = 'utf-8'
- return getweb.text
- else:
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
- getweb.encoding = 'utf-8'
- return getweb.text
- except:
- i += 1
- print('[-]Connect retry '+str(i)+'/'+str(retry_count))
- print('[-]Connect Failed! Please check your Proxy or Network!')
-
-
-def post_html(url: str, query: dict) -> requests.Response:
- proxy, timeout, retry_count = get_network_settings()
-
- if proxy:
- proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
- else:
- proxies = {}
-
- for i in range(retry_count):
- try:
- result = requests.post(url, data=query, proxies=proxies)
- return result
- except requests.exceptions.ProxyError:
- print("[-]Connect retry {}/{}".format(i+1, retry_count))
- print("[-]Connect Failed! Please check your Proxy or Network!")
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import requests
+from configparser import ConfigParser
+import os
+import re
+import time
+import sys
+from lxml import etree
+import sys
+import io
+from ConfigApp import ConfigApp
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+# sys.setdefaultencoding('utf-8')
+
+# config_file='config.ini'
+# config = ConfigParser()
+
+# if os.path.exists(config_file):
+# try:
+# config.read(config_file, encoding='UTF-8')
+# except:
+# print('[-]Config.ini read failed! Please use the offical file!')
+# else:
+# print('[+]config.ini: not found, creating...',end='')
+# with open("config.ini", "wt", encoding='UTF-8') as code:
+# print("[common]", file=code)
+# print("main_mode = 1", file=code)
+# print("failed_output_folder = failed", file=code)
+# print("success_output_folder = JAV_output", file=code)
+# print("", file=code)
+# print("[proxy]",file=code)
+# print("proxy=127.0.0.1:1081",file=code)
+# print("timeout=10", file=code)
+# print("retry=3", file=code)
+# print("", file=code)
+# print("[Name_Rule]", file=code)
+# print("location_rule=actor+'/'+number",file=code)
+# print("naming_rule=number+'-'+title",file=code)
+# print("", file=code)
+# print("[update]",file=code)
+# print("update_check=1",file=code)
+# print("", file=code)
+# print("[media]", file=code)
+# print("media_warehouse=emby", file=code)
+# print("#emby plex kodi", file=code)
+# print("", file=code)
+# print("[escape]", file=code)
+# print("literals=\\", file=code)
+# print("", file=code)
+# print("[movie_location]", file=code)
+# print("path=", file=code)
+# print("", file=code)
+# print('.',end='')
+# time.sleep(2)
+# print('.')
+# print('[+]config.ini: created!')
+# print('[+]Please restart the program!')
+# time.sleep(4)
+# os._exit(0)
+# try:
+# config.read(config_file, encoding='UTF-8')
+# except:
+# print('[-]Config.ini read failed! Please use the offical file!')
+
+config = ConfigApp()
+
+
+def get_network_settings():
+ try:
+ proxy = config.proxy
+ timeout = int(config.timeout)
+ retry_count = int(config.retry)
+ assert timeout > 0
+ assert retry_count > 0
+ except:
+ raise ValueError("[-]Proxy config error! Please check the config.")
+ return proxy, timeout, retry_count
+
+def getDataState(json_data): # 元数据获取失败检测
+ if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
+ return 0
+ else:
+ return 1
+
+def ReadMediaWarehouse():
+ return config.media_server
+
+def UpdateCheckSwitch():
+ check=str(config.update_check)
+ if check == '1':
+ return '1'
+ elif check == '0':
+ return '0'
+ elif check == '':
+ return '0'
+
+def getXpathSingle(htmlcode,xpath):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result1 = str(html.xpath(xpath)).strip(" ['']")
+ return result1
+
+def get_html(url,cookies = None):#网页请求核心
+ proxy, timeout, retry_count = get_network_settings()
+ i = 0
+ print(url)
+ while i < retry_count:
+ try:
+ if not proxy == '':
+ proxies = {"http": proxy, "https": proxy}
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
+ getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies)
+ getweb.encoding = 'utf-8'
+ return getweb.text
+ else:
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
+ getweb.encoding = 'utf-8'
+ return getweb.text
+ except Exception as e:
+ print(e)
+ i += 1
+ print('[-]Connect retry '+str(i)+'/'+str(retry_count))
+ print('[-]Connect Failed! Please check your Proxy or Network!')
+
+
diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py
index 547595e..1994529 100755
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@@ -1,162 +1,416 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import glob
-import os
-import time
-import re
-from ADC_function import *
-from core import *
-import json
-import shutil
-from configparser import ConfigParser
-import argparse
-
-
-def UpdateCheck(version):
- if UpdateCheckSwitch() == '1':
- html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
- html = json.loads(str(html2))
-
- if not version == html['version']:
- print('[*] * New update ' + html['version'] + ' *')
- print('[*] ↓ Download ↓')
- print('[*] ' + html['download'])
- print('[*]======================================================')
- else:
- print('[+]Update Check disabled!')
-
-def argparse_get_file():
- parser = argparse.ArgumentParser()
- parser.add_argument("file", default='',nargs='?', help="Write the file path on here")
- args = parser.parse_args()
- if args.file == '':
- return ''
- else:
- return args.file
-
-def movie_lists(escape_folder):
- escape_folder = re.split('[,,]', escape_folder)
- total = []
- file_type = ['.mp4', '.avi', '.rmvb', '.wmv', '.mov', '.mkv', '.flv', '.ts', '.webm', '.MP4', '.AVI', '.RMVB', '.WMV','.MOV', '.MKV', '.FLV', '.TS', '.WEBM', ]
- file_root = os.getcwd()
- for root, dirs, files in os.walk(file_root):
- flag_escape = 0
- for folder in escape_folder:
- if folder in root:
- flag_escape = 1
- break
- if flag_escape == 1:
- continue
- for f in files:
- if os.path.splitext(f)[1] in file_type:
- path = os.path.join(root, f)
- path = path.replace(file_root, '.')
- total.append(path)
- return total
-
-
-def CreatFailedFolder(failed_folder):
- if not os.path.exists(failed_folder + '/'): # 新建failed文件夹
- try:
- os.makedirs(failed_folder + '/')
- except:
- print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
- os._exit(0)
-
-
-def CEF(path):
- try:
- files = os.listdir(path) # 获取路径下的子文件(夹)列表
- for file in files:
- os.removedirs(path + '/' + file) # 删除这个空文件夹
- print('[+]Deleting empty folder', path + '/' + file)
- except:
- a = ''
-
-
-def getNumber(filepath,absolute_path = False):
- if absolute_path == True:
- filepath=filepath.replace('\\','/')
- file_number = str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
- return file_number
- if '-' in filepath or '_' in filepath: # 普通提取番号 主要处理包含减号-和_的番号
- filepath = filepath.replace("_", "-")
- filepath.strip('22-sht.me').strip('-HD').strip('-hd')
- filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中时间
- if 'FC2' or 'fc2' in filename:
- filename = filename.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-','FC2-').replace('FC2PPV_','FC2-')
- file_number = re.search(r'\w+-\w+', filename, re.A).group()
- return file_number
- else: # 提取不含减号-的番号,FANZA CID
- try:
- return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
- except:
- return re.search(r'(.+?)\.', filepath)[0]
-
-
-if __name__ == '__main__':
- version = '2.8.2'
- config_file = 'config.ini'
- config = ConfigParser()
- config.read(config_file, encoding='UTF-8')
- success_folder = config['common']['success_output_folder']
- failed_folder = config['common']['failed_output_folder'] # 失败输出目录
- escape_folder = config['escape']['folders'] # 多级目录刮削需要排除的目录
- print('[*]================== AV Data Capture ===================')
- print('[*] Version ' + version)
- print('[*]======================================================')
-
- UpdateCheck(version)
- CreatFailedFolder(failed_folder)
- os.chdir(os.getcwd())
- movie_list = movie_lists(escape_folder)
-
- #========== 野鸡番号拖动 ==========
- number_argparse=argparse_get_file()
- if not number_argparse == '':
- print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,absolute_path = True) + "]")
- core_main(number_argparse, getNumber(number_argparse,absolute_path = True))
- print("[*]======================================================")
- CEF(success_folder)
- CEF(failed_folder)
- print("[+]All finished!!!")
- input("[+][+]Press enter key exit, you can check the error messge before you exit.")
- os._exit(0)
- # ========== 野鸡番号拖动 ==========
-
- count = 0
- count_all = str(len(movie_list))
- print('[+]Find', count_all, 'movies')
- if config['common']['soft_link'] == '1':
- print('[!] --- Soft link mode is ENABLE! ----')
- for i in movie_list: # 遍历电影列表 交给core处理
- count = count + 1
- percentage = str(count / int(count_all) * 100)[:4] + '%'
- print('[!] - ' + percentage + ' [' + str(count) + '/' + count_all + '] -')
- # print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]")
- # core_main(i, getNumber(i))
- # print("[*]======================================================")
- try:
- print("[!]Making Data for [" + i + "], the number is [" + getNumber(i) + "]")
- core_main(i, getNumber(i))
- print("[*]======================================================")
- except: # 番号提取异常
- print('[-]' + i + ' Cannot catch the number :')
- if config['common']['soft_link'] == '1':
- print('[-]Link', i, 'to failed folder')
- os.symlink(i, str(os.getcwd()) + '/' + failed_folder + '/')
- else:
- try:
- print('[-]Move ' + i + ' to failed folder')
- shutil.move(i, str(os.getcwd()) + '/' + failed_folder + '/')
- except FileExistsError:
- print('[!]File exists in failed!')
- except:
- print('[+]skip')
- continue
-
- CEF(success_folder)
- CEF(failed_folder)
- print("[+]All finished!!!")
- input("[+][+]Press enter key exit, you can check the error messge before you exit.")
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import glob
+import os
+import time
+import fuckit
+from tenacity import retry, stop_after_delay, wait_fixed
+import json
+import shutil
+import itertools
+import argparse
+from pathlib import Path
+
+from core import *
+from ConfigApp import ConfigApp
+from PathNameProcessor import PathNameProcessor
+
+# TODO 封装聚合解耦:CORE
+# TODO (学习)统一依赖管理工具
+# TODO 不同媒体服务器尽量兼容统一一种元数据 如nfo 海报等(emby,jellyfin,plex)
+# TODO 字幕整理功能 文件夹中读取所有字幕 并提番号放入对应缓存文件夹中TEMP
+
+config = ConfigApp()
+
+
+def safe_list_get(list_in, idx, default=None):
+ """
+ 数组安全取值
+ :param list_in:
+ :param idx:
+ :param default:
+ :return:
+ """
+ try:
+ return list_in[idx]
+ except IndexError:
+ return default
+
+
+def UpdateCheck(version):
+ if UpdateCheckSwitch() == '1':
+ html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
+ html = json.loads(str(html2))
+
+ if not version == html['version']:
+ print('[*] * New update ' + html['version'] + ' *')
+ print('[*] ↓ Download ↓')
+ print('[*] ' + html['download'])
+ print('[*]======================================================')
+ else:
+ print('[+]Update Check disabled!')
+
+
+def argparse_get_file():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("file", default='', nargs='?', help="Write the file path on here")
+ args = parser.parse_args()
+ if args.file == '':
+ return ''
+ else:
+ return args.file
+
+
+def movie_lists(escape_folders):
+ escape_folders = re.split('[,,]', escape_folders)
+ total = []
+
+ for root, dirs, files in os.walk(config.search_folder):
+ if root in escape_folders:
+ continue
+ for file in files:
+ if re.search(PathNameProcessor.pattern_of_file_name_suffixes, file, re.IGNORECASE):
+ path = os.path.join(root, file)
+ total.append(path)
+ return total
+
+
+# def CEF(path):
+# try:
+# files = os.listdir(path) # 获取路径下的子文件(夹)列表
+# for file in files:
+# os.removedirs(path + '/' + file) # 删除这个空文件夹
+# print('[+]Deleting empty folder', path + '/' + file)
+# except:
+# a = ''
+#
+
+
+def get_numbers(paths):
+ """提取对应路径的番号+集数"""
+
+ def get_number(filepath, absolute_path=False):
+ """
+ 获取番号,集数
+ :param filepath:
+ :param absolute_path:
+ :return:
+ """
+ name = filepath.upper() # 转大写
+ if absolute_path:
+ name = name.replace('\\', '/')
+ # 移除干扰字段
+ name = PathNameProcessor.remove_distractions(name)
+ # 抽取 文件路径中可能存在的尾部集数,和抽取尾部集数的后的文件路径
+ suffix_episode, name = PathNameProcessor.extract_suffix_episode(name)
+ # 抽取 文件路径中可能存在的 番号后跟随的集数 和 处理后番号
+ episode_behind_code, code_number = PathNameProcessor.extract_code(name)
+ # 无番号 则设置空字符
+ code_number = code_number if code_number else ''
+ # 优先取尾部集数,无则取番号后的集数(几率低),都无则为空字符
+ episode = suffix_episode if suffix_episode else episode_behind_code if episode_behind_code else ''
+
+ return code_number, episode
+
+ maps = {}
+ for path in paths:
+ number, episode = get_number(path)
+ maps[path] = (number, episode)
+
+ return maps
+
+
+def create_folder(paths):
+ for path_to_make in paths:
+ if path_to_make:
+ try:
+ os.makedirs(path_to_make)
+ except FileExistsError as e:
+ # name = f'{folder=}'.split('=')[0].split('.')[-1]
+ print(path_to_make + " 已经存在")
+ pass
+ except Exception as exception:
+ print('! 创建文件夹 ' + path_to_make + ' 失败,文件夹路径错误或权限不够')
+ raise exception
+ else:
+ raise Exception('!创建的文件夹路径为空,请确认')
+
+
+if __name__ == '__main__':
+ version = '2.8.2'
+
+ print('[*]================== AV Data Capture ===================')
+ print('[*] Version ' + version)
+ print('[*]======================================================')
+
+ # UpdateCheck(version)
+
+ CreatFailedFolder(config.failed_folder)
+ os.chdir(os.getcwd())
+
+ # 创建文件夹
+ create_folder([config.failed_folder, config.search_folder, config.temp_folder])
+
+ # temp 文件夹中infos放 番号json信息,pics中放图片信息
+ path_infos = config.temp_folder + '/infos'
+ path_pics = config.temp_folder + '/pics'
+
+ create_folder([path_infos, path_pics])
+
+ # 遍历搜索目录下所有视频的路径
+ movie_list = movie_lists(config.escape_folder)
+
+ # 以下是从文本中提取测试的数据
+ # f = open('TestPathNFO.txt', 'r')
+ # f = open('TestPathSpecial.txt', 'r')
+ # movie_list = [line[:-1] for line in f.readlines()]
+ # f.close()
+
+ # 获取 番号,集数,路径 的字典->list
+ code_ep_paths = [[codeEposode[0], codeEposode[1], path] for path, codeEposode in get_numbers(movie_list).items()]
+ [print(i) for i in code_ep_paths]
+ # 按番号分组片子列表(重点),用于寻找相同番号的片子
+ '''
+ 这里利用pandas分组 "https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html"
+
+ '''
+ # # 设置打印时显示所有列
+ # pd.set_option('display.max_columns', None)
+ # # 显示所有行
+ # pd.set_option('display.max_rows', None)
+ # # 设置value的显示长度为100,默认为50
+ # pd.set_option('max_colwidth', 30)
+ # # 创建框架
+ # df = pd.DataFrame(code_ep_paths, columns=('code', 'ep', 'path'))
+ # # 以番号分组
+ # groupedCode_code_ep_paths = df.groupby(['code'])
+ # # print(df.groupby(['code', 'ep']).describe().unstack())
+ # grouped_code_ep = df.groupby(['code', 'ep'])['path']
+ #
+ sorted_code_list = sorted(code_ep_paths, key=lambda code_ep_path: code_ep_path[0])
+ group_code_list = itertools.groupby(sorted_code_list, key=lambda code_ep_path: code_ep_path[0])
+
+
+ def group_code_list_to_dict(group_code_list):
+ data_dict = {}
+ for code, code_ep_path_group in group_code_list:
+ code_ep_path_list = list(code_ep_path_group)
+ eps_of_code = {}
+ group_ep_list = itertools.groupby(code_ep_path_list, key=lambda code_ep_path: code_ep_path[1])
+ for ep, group_ep_group in group_ep_list:
+ group_ep_list = list(group_ep_group)
+ eps_of_code[ep] = [code_ep_path[2] for code_ep_path in group_ep_list]
+ data_dict[code] = eps_of_code
+
+ return data_dict
+
+
+ def print_same_code_ep_path(data_dict_in):
+ for code_in in data_dict_in:
+ ep_path_list = data_dict_in[code_in]
+ if len(ep_path_list) > 1:
+ print('--' * 60)
+ print("|" + (code_in if code_in else 'unknown') + ":")
+
+ # group_ep_list = itertools.groupby(code_ep_path_list.items(), key=lambda code_ep_path: code_ep_path[0])
+ for ep in ep_path_list:
+ path_list = ep_path_list[ep]
+ print('--' * 12)
+ ep = ep if ep else ' '
+ if len(path_list) == 1:
+ print('| 集数:' + ep + ' 文件: ' + path_list[0])
+ else:
+ print('| 集数:' + ep + ' 文件: ')
+ for path in path_list:
+ print('| ' + path)
+
+ else:
+ pass
+
+
+ # 分好组的数据 {code:{ep:[path]}}
+ data_dict_groupby_code_ep = group_code_list_to_dict(group_code_list)
+
+ print('--' * 100)
+ print("找到影片数量:" + str(len(movie_list)))
+ print("合计番号数量:" + str(len(data_dict_groupby_code_ep)) + " (多个相同番号的影片只统计一个,不能识别的番号 都统一为'unknown')")
+ print('Warning:!!!! 以下为相同番号的电影明细')
+ print('◤' + '--' * 80)
+ print_same_code_ep_path(data_dict_groupby_code_ep)
+ print('◣' + '--' * 80)
+
+ isContinue = input('任意键继续? N 退出 \n')
+ if isContinue.strip(' ') == "N":
+ exit(1)
+
+
+ # ========== 野鸡番号拖动 ==========
+ # number_argparse = argparse_get_file()
+ # if not number_argparse == '':
+ # print("[!]Making Data for [" + number_argparse + "], the number is [" + getNumber(number_argparse,
+ # absolute_path=True) + "]")
+ # nfo = core_main(number_argparse, getNumber(number_argparse, absolute_path=True))
+ # print("[*]======================================================")
+ # CEF(config.success_folder)
+ # CEF(config.failed_folder)
+ # print("[+]All finished!!!")
+ # input("[+][+]Press enter key exit, you can check the error messge before you exit.")
+ # os._exit(0)
+ # ========== 野鸡番号拖动 ==========
+
+ def download_code_infos(code_list, is_read_cache=True):
+ """
+ 遍历按番号分组的集合,刮取番号信息并缓存
+
+ :param is_read_cache: 是否读取缓存数据
+ :param code_list:
+ :return: {code:nfo}
+ """
+ count_all_grouped = len(code_list)
+ count = 0
+ code_info_dict = {}
+
+ for code in code_list:
+ count = count + 1
+ percentage = str(count / int(count_all_grouped) * 100)[:4] + '%'
+ print('[!] - ' + percentage + ' [' + str(count) + '/' + str(count_all_grouped) + '] -')
+ try:
+ print("[!]搜刮数据 [" + code + "]")
+ if code:
+ # 创建番号的文件夹
+ file_path = path_infos + '/' + code + '.json'
+ nfo = {}
+ # 读取缓存信息,如果没有则联网搜刮
+
+ path = Path(file_path)
+ if is_read_cache and (path.exists() and path.is_file() and path.stat().st_size > 0):
+ print('找到缓存信息')
+ with open(file_path) as fp:
+ nfo = json.load(fp)
+ else:
+
+ # 核心功能 - 联网抓取信息字典
+ print('联网搜刮')
+ nfo = core_main(code)
+ print('正在写入', end='')
+
+ # 把缓存信息写入缓存文件夹中,有时会设备占用而失败,重试即可
+ @retry(stop=stop_after_delay(3), wait=wait_fixed(2))
+ def read_file():
+ with open(file_path, 'w') as fp:
+ json.dump(nfo, fp)
+
+ read_file()
+ print('完成!')
+ # 将番号信息放入字典
+ code_info_dict[code] = nfo
+ print("[*]======================================================")
+
+ except Exception as e: # 番号的信息获取失败
+ code_info_dict[code] = ''
+ print("找不到信息:" + code + ',Reason:' + str(e))
+
+ # if config.soft_link:
+ # print('[-]Link', file_path_name, 'to failed folder')
+ # os.symlink(file_path_name, config.failed_folder + '/')
+ # else:
+ # try:
+ # print('[-]Move ' + file_path_name + ' to failed folder:' + config.failed_folder)
+ # shutil.move(file_path_name, config.failed_folder + '/')
+ # except FileExistsError:
+ # print('[!]File exists in failed!')
+ # except:
+ # print('[+]skip')
+ continue
+ return code_info_dict
+
+
+ print('----------------------------------')
+ code_infos = download_code_infos(data_dict_groupby_code_ep)
+ print("----未找到番号数据的番号----")
+ print([print(code) for code in code_infos if code_infos[code] == ''])
+ print("-------------------------")
+
+
+ def download_images_of_nfos(code_info_dict):
+ """
+ 遍历番号信息,下载番号电影的海报,图片
+ :param code_info_dict:
+ :return: 无图片的信息的番号
+ """
+
+ code_list_empty_image = []
+ for code in code_info_dict:
+ nfo = code_info_dict[code]
+ if len(nfo.keys()) == 0:
+ code_list_empty_image.append(code)
+ continue
+
+ code_pics_folder_to_save = path_pics + '/' + code
+ # 1 创建 番号文件夹
+ os.makedirs(code_pics_folder_to_save, exist_ok=True)
+ # 下载缩略图
+ if nfo['imagecut'] == 3: # 3 是缩略图
+ path = Path(code_pics_folder_to_save + '/' + 'thumb.png')
+ if path.exists() and path.is_file() and path.stat().st_size > 0:
+ print(code + ':缩略图已有缓存')
+ else:
+ print(code + ':缩略图下载中...')
+ download_file(nfo['cover_small'], code_pics_folder_to_save, 'thumb.png')
+ print(code + ':缩略图下载完成')
+ # 下载海报
+ path = Path(code_pics_folder_to_save + '/' + 'poster.png')
+ if path.exists() and path.is_file() and path.stat().st_size > 0:
+ print(code + ':海报已有缓存')
+ else:
+ print(code + ':海报下载中...')
+ download_file(nfo['cover'], code_pics_folder_to_save, 'poster.png')
+ print(code + ':海报下载完成')
+ return code_list_empty_image
+
+
+
+ code_list_empty = download_images_of_nfos(code_infos)
+ print("----未找到集数的番号----")
+ print([print(code) for code in code_list_empty])
+ print("------搜刮未找到集数的番号------")
+ code_infos_of_no_ep = download_code_infos(code_list_empty, is_read_cache=False)
+ print("----还是未找到番号数据的番号----")
+ print([print(code) for code in code_infos_of_no_ep if code_infos_of_no_ep[code] == ''])
+ print("----------------------")
+ # 开始操作
+ # # 2 创建缩略图海报
+ # if nfo['imagecut'] == 3: # 3 是缩略图
+ # download_cover_file(nfo['cover_small'], code, code_pics_folder_to_save)
+ # # 3 创建图
+ # download_image(nfo['cover'], code, code_pics_folder_to_save)
+ # # 4 剪裁
+ # crop_image(nfo['imagecut'], code, code_pics_folder_to_save)
+ # # 5 背景图
+ # copy_images_to_background_image(code, code_pics_folder_to_save)
+ # 6 创建 mame.nfo(不需要,需要时从infos中josn文件转为nfo文件)
+ # make_nfo_file(nfo, code, temp_path_to_save)
+ # 相同番号处理:按集数添加-CD[X];视频格式 and 大小 分;
+ # TODO 方式1 刮削:添加nfo,封面,内容截图等
+ # 6 创建 mame.nfo(不需要,需要时从infos中josn文件转为nfo文件)
+ make_nfo_file(nfo, code, temp_path_to_save)
+ # TODO 方式2 整理:按规则移动影片,字幕 到 演员,发行商,有无🐎 等
+
+ # if config.program_mode == '1':
+ # if multi_part == 1:
+ # number += part # 这时number会被附加上CD1后缀
+ # smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, filepath, config.failed_folder) # 检查小封面
+ # imageDownload(option, json_data['cover'], number, c_word, path, multi_part, filepath, config.failed_folder) # creatFoder会返回番号路径
+ # cutImage(option, imagecut, path, number, c_word) # 裁剪图
+ # copyRenameJpgToBackdrop(option, path, number, c_word)
+ # PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, config.failed_folder, tag) # 打印文件 .nfo
+ # pasteFileToFolder(filepath, path, number, c_word) # 移动文件
+ # # =======================================================================整理模式
+ # elif config.program_mode == '2':
+ # pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件
+
+ # CEF(config.success_folder)
+ # CEF(config.failed_folder)
+ print("[+]All finished!!!")
+ input("[+][+]Press enter key exit, you can check the error message before you exit.")
diff --git a/ConfigApp.py b/ConfigApp.py
new file mode 100755
index 0000000..0d8c835
--- /dev/null
+++ b/ConfigApp.py
@@ -0,0 +1,28 @@
+from configparser import ConfigParser
+
+from MediaServer import MediaServer
+
+
+class ConfigApp:
+ def __init__(self):
+ config_file = 'config.ini'
+ config = ConfigParser()
+ config.read(config_file, encoding='UTF-8')
+ self.success_folder = config['common']['success_output_folder']
+ self.failed_folder = config['common']['failed_output_folder'] # 失败输出目录
+ self.escape_folder = config['escape']['folders'] # 多级目录刮削需要排除的目录
+ self.search_folder = config['common']['search_folder'] # 搜索路径
+ self.temp_folder = config['common']['temp_folder'] # 临时资源路径
+ self.soft_link = (config['common']['soft_link'] == 1)
+ # self.escape_literals = (config['escape']['literals'] == 1)
+ self.naming_rule = config['Name_Rule']['naming_rule']
+ self.location_rule = config['Name_Rule']['location_rule']
+
+ self.proxy = config['proxy']['proxy']
+ self.timeout = float(config['proxy']['timeout'])
+ self.retry = int(config['proxy']['retry'])
+ self.media_server = MediaServer[config['media']['media_warehouse']]
+ self.update_check = config['update']['update_check']
+ self.debug_mode = config['debug_mode']['switch']
+
+
diff --git a/LICENSE b/LICENSE
old mode 100644
new mode 100755
diff --git a/LearningNote/GroupbyDemo.py b/LearningNote/GroupbyDemo.py
new file mode 100644
index 0000000..558a415
--- /dev/null
+++ b/LearningNote/GroupbyDemo.py
@@ -0,0 +1,19 @@
+import pandas as pd
+import numpy as np
+
+df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
+ 'foo', 'bar', 'foo', 'foo'],
+ 'B': ['one', 'one', 'two', 'three',
+ 'two', 'two', 'one', 'three'],
+ 'C': np.random.randn(8),
+ 'D': np.random.randn(8)})
+
+print(df)
+groupedA = df.groupby('A').describe()
+groupedAB = df.groupby(['A', 'B'])['C']
+print('---'*18)
+for a, b in groupedAB:
+ print('--'*18)
+ print(a)
+ print('-' * 18)
+ print(b)
diff --git a/LearningNote/PandasDemo.py b/LearningNote/PandasDemo.py
new file mode 100644
index 0000000..0ed8aad
--- /dev/null
+++ b/LearningNote/PandasDemo.py
@@ -0,0 +1,38 @@
+import pandas as pd
+import numpy as np
+
+'''
+python数据处理三剑客之一pandas
+https://pandas.pydata.org/pandas-docs/stable/user_guide
+https://www.pypandas.cn/docs/getting_started/10min.html
+'''
+
+dates = pd.date_range('20130101', periods=6)
+df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
+print(dates)
+print(df)
+
+df2 = pd.DataFrame({'A': 1.,
+ 'B': pd.Timestamp('20130102'),
+ 'C': pd.Series(1, index=list(range(4)), dtype='float32'),
+ 'D': np.array([3] * 4, dtype='int32'),
+ 'E': pd.Categorical(["test", "train", "test", "train"]),
+ 'F': 'foo'})
+print(df2)
+print(df2.dtypes)
+print(df.head())
+print(df.tail(5))
+print(df.index)
+print(df.columns)
+df.describe() # 统计数据摘要
+df.T # index columns互转
+df.sort_index(axis=1, ascending=False) # 排序,axis=1 是columns,axis=1 是index
+df.sort_values(by='B') # 按值排序 按B列中的值排序
+
+# 切行
+df.A
+df['A']
+# 切行
+df['20130102':'20130104']
+df[0:3]
+
diff --git a/MediaServer.py b/MediaServer.py
new file mode 100644
index 0000000..52e1530
--- /dev/null
+++ b/MediaServer.py
@@ -0,0 +1,28 @@
+from enum import Enum, auto
+
+
+class MediaServer(Enum):
+ EMBY = auto()
+ PLEX = auto()
+ KODI = auto()
+
+ # media = EMBY
+ #
+ # def __init__(self, arg):
+ # self = [e for e in MediaServer if arg.upper() == self.name]
+
+ def poster_name(self, name):
+ if self == MediaServer.EMBY: # 保存[name].png
+ return name + '.png'
+ elif self == MediaServer.KODI: # 保存[name]-poster.jpg
+ return name + '-poster.jpg'
+ elif self == MediaServer.PLEX: # 保存 poster.jpg
+ return 'poster.jpg'
+
+ def image_name(self, name):
+ if self == MediaServer.EMBY: # name.jpg
+ return name + '.jpg'
+ elif self == MediaServer.KODI: # [name]-fanart.jpg
+ return name + '-fanart.jpg'
+ elif self == MediaServer.PLEX: # fanart.jpg
+ return 'fanart.jpg'
diff --git a/Metadate.py b/Metadate.py
new file mode 100644
index 0000000..9acf3c5
--- /dev/null
+++ b/Metadate.py
@@ -0,0 +1,3 @@
+from addict import Dict
+
+# class Metadata:
diff --git a/PathNameProcessor.py b/PathNameProcessor.py
new file mode 100644
index 0000000..fd87842
--- /dev/null
+++ b/PathNameProcessor.py
@@ -0,0 +1,115 @@
+import re
+
+import fuckit
+
+
+class PathNameProcessor:
+ # 类变量
+ pattern_of_file_name_suffixes = r'.(mov|mp4|avi|rmvb|wmv|mov|mkv|flv|ts|m2ts)$'
+
+ # def __init__(self):
+
+ @staticmethod
+ def remove_distractions(origin_name):
+ """移除干扰项"""
+ # 移除文件类型后缀
+ origin_name = re.sub(PathNameProcessor.pattern_of_file_name_suffixes, '', origin_name, 0, re.IGNORECASE)
+
+ # 处理包含减号-和_的番号'/-070409_621'
+ origin_name = re.sub(r'[-_~*# ]', "-", origin_name, 0)
+
+ origin_name = re.sub(r'(Carib)(bean)?', '-', origin_name, 0, re.IGNORECASE)
+ origin_name = re.sub(r'(1pondo)', '-', origin_name, 0, re.IGNORECASE)
+ origin_name = re.sub(r'(tokyo)[-. ]?(hot)', '-', origin_name, 0, re.IGNORECASE)
+ origin_name = re.sub(r'Uncensored', '-', origin_name, 0, re.IGNORECASE)
+ origin_name = re.sub(r'JAV', '-', origin_name, 0, re.IGNORECASE)
+ # 移除干扰字段
+ origin_name = origin_name.replace('22-sht.me', '-')
+
+ # 去除文件名中时间 1970-2099年 月 日
+ pattern_of_date = r'(?:-)(19[789]\d|20\d{2})(-?(0\d|1[012])-?(0[1-9]|[12]\d|3[01])?)?[-.]'
+ # 移除字母开头 清晰度相关度 字符
+ pattern_of_resolution_alphas = r'(? NTTR-037 , SIVR-00008 -> SIVR-008 ,但是heyzo除外
+ if "heyzo" not in name.lower():
+ searched = re.search(r'([a-zA-Z]{2,})-(?:0*)(\d{3,})', name)
+ if searched:
+ name = '-'.join(searched.groups())
+
+ return episode, name
+
+ @staticmethod
+ def extract_episode_behind_code(origin_name, code):
+ episode = None
+
+ with fuckit:
+ # 零宽断言获取尾部字幕 剧集数 abc123
+ result_dict = re.search(rf'(?<={code})-?((?P([A-Z](?![A-Z])))|(?P\d(?!\d)))', origin_name,
+ re.I).groupdict()
+ episode = result_dict['alpha'] or result_dict['num']
+ return episode
+
+
+def safe_list_get(list_in, idx, default):
+ try:
+ return list_in[idx]
+ except IndexError:
+ return default
diff --git a/Pipfile b/Pipfile
new file mode 100644
index 0000000..cca1b93
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,19 @@
+[[source]]
+name = "pypi"
+url = "https://pypi.org/simple"
+verify_ssl = true
+
+[dev-packages]
+
+[packages]
+bs4 = "*"
+tenacity = "*"
+fuckit = "*"
+requests = "*"
+image = "*"
+lazyxml = {editable = true,git = "https://github.com/waynedyck/lazyxml.git",ref = "python-3-conversion_wd1"}
+lxml = "*"
+pyquery = "*"
+
+[requires]
+python_version = "3.8"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644
index 0000000..1ca43ea
--- /dev/null
+++ b/Pipfile.lock
@@ -0,0 +1,246 @@
+{
+ "_meta": {
+ "hash": {
+ "sha256": "15bf3c6af3ec315358a0217481a13285f95fc742bb5db8a1f934e0d1c3d7d5e2"
+ },
+ "pipfile-spec": 6,
+ "requires": {
+ "python_version": "3.8"
+ },
+ "sources": [
+ {
+ "name": "pypi",
+ "url": "https://pypi.org/simple",
+ "verify_ssl": true
+ }
+ ]
+ },
+ "default": {
+ "asgiref": {
+ "hashes": [
+ "sha256:5ee950735509d04eb673bd7f7120f8fa1c9e2df495394992c73234d526907e17",
+ "sha256:7162a3cb30ab0609f1a4c95938fd73e8604f63bdba516a7f7d64b83ff09478f0"
+ ],
+ "markers": "python_version >= '3.5'",
+ "version": "==3.3.1"
+ },
+ "beautifulsoup4": {
+ "hashes": [
+ "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
+ "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25",
+ "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"
+ ],
+ "version": "==4.9.3"
+ },
+ "bs4": {
+ "hashes": [
+ "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
+ ],
+ "index": "pypi",
+ "version": "==0.0.1"
+ },
+ "certifi": {
+ "hashes": [
+ "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
+ "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
+ ],
+ "version": "==2020.12.5"
+ },
+ "chardet": {
+ "hashes": [
+ "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
+ "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
+ "version": "==4.0.0"
+ },
+ "cssselect": {
+ "hashes": [
+ "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf",
+ "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==1.1.0"
+ },
+ "django": {
+ "hashes": [
+ "sha256:2d78425ba74c7a1a74b196058b261b9733a8570782f4e2828974777ccca7edf7",
+ "sha256:efa2ab96b33b20c2182db93147a0c3cd7769d418926f9e9f140a60dca7c64ca9"
+ ],
+ "markers": "python_version >= '3.6'",
+ "version": "==3.1.5"
+ },
+ "fuckit": {
+ "hashes": [
+ "sha256:059488e6aa2053da9db5eb5101e2498f608314da5118bf2385acb864568ccc25"
+ ],
+ "index": "pypi",
+ "version": "==4.8.1"
+ },
+ "idna": {
+ "hashes": [
+ "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
+ "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==2.10"
+ },
+ "image": {
+ "hashes": [
+ "sha256:baa2e09178277daa50f22fd6d1d51ec78f19c12688921cb9ab5808743f097126"
+ ],
+ "index": "pypi",
+ "version": "==1.5.33"
+ },
+ "lazyxml": {
+ "editable": true,
+ "git": "https://github.com/waynedyck/lazyxml.git",
+ "ref": "f42ea4a4febf4c1e120b05d6ca9cef42556a75d5"
+ },
+ "lxml": {
+ "hashes": [
+ "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d",
+ "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37",
+ "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01",
+ "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2",
+ "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644",
+ "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75",
+ "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80",
+ "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2",
+ "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780",
+ "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98",
+ "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308",
+ "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf",
+ "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388",
+ "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d",
+ "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3",
+ "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8",
+ "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af",
+ "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2",
+ "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e",
+ "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939",
+ "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03",
+ "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d",
+ "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a",
+ "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5",
+ "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a",
+ "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711",
+ "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf",
+ "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089",
+ "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505",
+ "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b",
+ "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f",
+ "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc",
+ "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e",
+ "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931",
+ "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc",
+ "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe",
+ "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e"
+ ],
+ "index": "pypi",
+ "version": "==4.6.2"
+ },
+ "pillow": {
+ "hashes": [
+ "sha256:165c88bc9d8dba670110c689e3cc5c71dbe4bfb984ffa7cbebf1fac9554071d6",
+ "sha256:1d208e670abfeb41b6143537a681299ef86e92d2a3dac299d3cd6830d5c7bded",
+ "sha256:22d070ca2e60c99929ef274cfced04294d2368193e935c5d6febfd8b601bf865",
+ "sha256:2353834b2c49b95e1313fb34edf18fca4d57446675d05298bb694bca4b194174",
+ "sha256:39725acf2d2e9c17356e6835dccebe7a697db55f25a09207e38b835d5e1bc032",
+ "sha256:3de6b2ee4f78c6b3d89d184ade5d8fa68af0848f9b6b6da2b9ab7943ec46971a",
+ "sha256:47c0d93ee9c8b181f353dbead6530b26980fe4f5485aa18be8f1fd3c3cbc685e",
+ "sha256:5e2fe3bb2363b862671eba632537cd3a823847db4d98be95690b7e382f3d6378",
+ "sha256:604815c55fd92e735f9738f65dabf4edc3e79f88541c221d292faec1904a4b17",
+ "sha256:6c5275bd82711cd3dcd0af8ce0bb99113ae8911fc2952805f1d012de7d600a4c",
+ "sha256:731ca5aabe9085160cf68b2dbef95fc1991015bc0a3a6ea46a371ab88f3d0913",
+ "sha256:7612520e5e1a371d77e1d1ca3a3ee6227eef00d0a9cddb4ef7ecb0b7396eddf7",
+ "sha256:7916cbc94f1c6b1301ac04510d0881b9e9feb20ae34094d3615a8a7c3db0dcc0",
+ "sha256:81c3fa9a75d9f1afafdb916d5995633f319db09bd773cb56b8e39f1e98d90820",
+ "sha256:887668e792b7edbfb1d3c9d8b5d8c859269a0f0eba4dda562adb95500f60dbba",
+ "sha256:93a473b53cc6e0b3ce6bf51b1b95b7b1e7e6084be3a07e40f79b42e83503fbf2",
+ "sha256:96d4dc103d1a0fa6d47c6c55a47de5f5dafd5ef0114fa10c85a1fd8e0216284b",
+ "sha256:a3d3e086474ef12ef13d42e5f9b7bbf09d39cf6bd4940f982263d6954b13f6a9",
+ "sha256:b02a0b9f332086657852b1f7cb380f6a42403a6d9c42a4c34a561aa4530d5234",
+ "sha256:b09e10ec453de97f9a23a5aa5e30b334195e8d2ddd1ce76cc32e52ba63c8b31d",
+ "sha256:b6f00ad5ebe846cc91763b1d0c6d30a8042e02b2316e27b05de04fa6ec831ec5",
+ "sha256:bba80df38cfc17f490ec651c73bb37cd896bc2400cfba27d078c2135223c1206",
+ "sha256:c3d911614b008e8a576b8e5303e3db29224b455d3d66d1b2848ba6ca83f9ece9",
+ "sha256:ca20739e303254287138234485579b28cb0d524401f83d5129b5ff9d606cb0a8",
+ "sha256:cb192176b477d49b0a327b2a5a4979552b7a58cd42037034316b8018ac3ebb59",
+ "sha256:cdbbe7dff4a677fb555a54f9bc0450f2a21a93c5ba2b44e09e54fcb72d2bd13d",
+ "sha256:cf6e33d92b1526190a1de904df21663c46a456758c0424e4f947ae9aa6088bf7",
+ "sha256:d355502dce85ade85a2511b40b4c61a128902f246504f7de29bbeec1ae27933a",
+ "sha256:d673c4990acd016229a5c1c4ee8a9e6d8f481b27ade5fc3d95938697fa443ce0",
+ "sha256:dc577f4cfdda354db3ae37a572428a90ffdbe4e51eda7849bf442fb803f09c9b",
+ "sha256:dd9eef866c70d2cbbea1ae58134eaffda0d4bfea403025f4db6859724b18ab3d",
+ "sha256:f50e7a98b0453f39000619d845be8b06e611e56ee6e8186f7f60c3b1e2f0feae"
+ ],
+ "markers": "python_version >= '3.6'",
+ "version": "==8.1.0"
+ },
+ "pyquery": {
+ "hashes": [
+ "sha256:1fc33b7699455ed25c75282bc8f80ace1ac078b0dda5a933dacbd8b1c1f83963",
+ "sha256:a388eefb6bc4a55350de0316fbd97cda999ae669b6743ae5b99102ba54f5aa72"
+ ],
+ "index": "pypi",
+ "version": "==1.4.3"
+ },
+ "pytz": {
+ "hashes": [
+ "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4",
+ "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5"
+ ],
+ "version": "==2020.5"
+ },
+ "requests": {
+ "hashes": [
+ "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
+ "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
+ ],
+ "index": "pypi",
+ "version": "==2.25.1"
+ },
+ "six": {
+ "hashes": [
+ "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
+ "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
+ "version": "==1.15.0"
+ },
+ "soupsieve": {
+ "hashes": [
+ "sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851",
+ "sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e"
+ ],
+ "markers": "python_version >= '3.0'",
+ "version": "==2.1"
+ },
+ "sqlparse": {
+ "hashes": [
+ "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
+ "sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8"
+ ],
+ "markers": "python_version >= '3.5'",
+ "version": "==0.4.1"
+ },
+ "tenacity": {
+ "hashes": [
+ "sha256:baed357d9f35ec64264d8a4bbf004c35058fad8795c5b0d8a7dc77ecdcbb8f39",
+ "sha256:e14d191fb0a309b563904bbc336582efe2037de437e543b38da749769b544d7f"
+ ],
+ "index": "pypi",
+ "version": "==6.3.1"
+ },
+ "urllib3": {
+ "hashes": [
+ "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08",
+ "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"
+ ],
+ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
+ "version": "==1.26.2"
+ }
+ },
+ "develop": {}
+}
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
diff --git a/avsox.py b/SiteSource/avsox.py
old mode 100644
new mode 100755
similarity index 96%
rename from avsox.py
rename to SiteSource/avsox.py
index 67ee9bf..87ae401
--- a/avsox.py
+++ b/SiteSource/avsox.py
@@ -1,115 +1,116 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find_all(attrs={'class': 'avatar-box'})
- d = {}
- for i in a:
- l = i.img['src']
- t = i.span.get_text()
- p2 = {t: l}
- d.update(p2)
- return d
-def getTitle(a):
- try:
- html = etree.fromstring(a, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
- return result.replace('/', '')
- except:
- return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- soup = BeautifulSoup(a, 'lxml')
- a = soup.find_all(attrs={'class': 'avatar-box'})
- d = []
- for i in a:
- d.append(i.span.get_text())
- return d
-def getStudio(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
- return result1
-def getRuntime(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
- return result1
-def getLabel(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
- return result1
-def getNum(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
- return result1
-def getYear(release):
- try:
- result = str(re.search('\d{4}',release).group())
- return result
- except:
- return release
-def getRelease(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
- return result1
-def getCover(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
- return result
-def getCover_small(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
- return result
-def getTag(a): # 获取演员
- soup = BeautifulSoup(a, 'lxml')
- a = soup.find_all(attrs={'class': 'genre'})
- d = []
- for i in a:
- d.append(i.get_text())
- return d
-
-def main(number):
- a = get_html('https://avsox.host/cn/search/' + number)
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
- if result1 == '' or result1 == 'null' or result1 == 'None':
- a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
- print(a)
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
- if result1 == '' or result1 == 'null' or result1 == 'None':
- a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
- print(a)
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
- web = get_html(result1)
- soup = BeautifulSoup(web, 'lxml')
- info = str(soup.find(attrs={'class': 'row movie'}))
- dic = {
- 'actor': getActor(web),
- 'title': getTitle(web).strip(getNum(web)),
- 'studio': getStudio(info),
- 'outline': '',#
- 'runtime': getRuntime(info),
- 'director': '', #
- 'release': getRelease(info),
- 'number': getNum(info),
- 'cover': getCover(web),
- 'cover_small': getCover_small(a),
- 'imagecut': 3,
- 'tag': getTag(web),
- 'label': getLabel(info),
- 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
- 'actor_photo': getActorPhoto(web),
- 'website': result1,
- 'source': 'avsox.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
-
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find_all(attrs={'class': 'avatar-box'})
+ d = {}
+ for i in a:
+ l = i.img['src']
+ t = i.span.get_text()
+ p2 = {t: l}
+ d.update(p2)
+ return d
+def getTitle(a):
+ try:
+ html = etree.fromstring(a, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
+ return result.replace('/', '')
+ except:
+ return ''
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ soup = BeautifulSoup(a, 'lxml')
+ a = soup.find_all(attrs={'class': 'avatar-box'})
+ d = []
+ for i in a:
+ d.append(i.span.get_text())
+ return d
+def getStudio(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
+ return result1
+def getRuntime(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
+ return result1
+def getLabel(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
+ return result1
+def getNum(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
+ return result1
+def getYear(release):
+ try:
+ result = str(re.search('\d{4}',release).group())
+ return result
+ except:
+ return release
+def getRelease(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
+ return result1
+def getCover(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
+ return result
+def getCover_small(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
+ return result
+def getTag(a): # 获取演员
+ soup = BeautifulSoup(a, 'lxml')
+ a = soup.find_all(attrs={'class': 'genre'})
+ d = []
+ for i in a:
+ d.append(i.get_text())
+ return d
+
+def main(number):
+ url = 'https://avsox.host/cn/search/' + number
+ a = get_html(url)
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+ if result1 == '' or result1 == 'null' or result1 == 'None':
+ a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
+ print(a)
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+ if result1 == '' or result1 == 'null' or result1 == 'None':
+ a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
+ print(a)
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+ web = get_html(result1)
+ soup = BeautifulSoup(web, 'lxml')
+ info = str(soup.find(attrs={'class': 'row movie'}))
+ dic = {
+ 'actor': getActor(web),
+ 'title': getTitle(web).strip(getNum(web)),
+ 'studio': getStudio(info),
+ 'outline': '',#
+ 'runtime': getRuntime(info),
+ 'director': '', #
+ 'release': getRelease(info),
+ 'number': getNum(info),
+ 'cover': getCover(web),
+ 'cover_small': getCover_small(a),
+ 'imagecut': 3,
+ 'tag': getTag(web),
+ 'label': getLabel(info),
+ 'year': getYear(getRelease(info)), # str(re.search('\d{4}',getRelease(a)).group()),
+ 'actor_photo': getActorPhoto(web),
+ 'website': result1,
+ 'source': 'avsox.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+
#print(main('012717_472'))
\ No newline at end of file
diff --git a/fanza.py b/SiteSource/fanza.py
old mode 100644
new mode 100755
similarity index 97%
rename from fanza.py
rename to SiteSource/fanza.py
index 87c8be0..72632dc
--- a/fanza.py
+++ b/SiteSource/fanza.py
@@ -1,229 +1,229 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-import json
-import re
-
-from lxml import etree
-
-from ADC_function import *
-
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-
-def getTitle(text):
- html = etree.fromstring(text, etree.HTMLParser())
- result = html.xpath('//*[@id="title"]/text()')[0]
- return result
-
-
-def getActor(text):
- # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- html = etree.fromstring(text, etree.HTMLParser())
- result = (
- str(
- html.xpath(
- "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
- )
- )
- .strip(" ['']")
- .replace("', '", ",")
- )
- return result
-
-
-def getStudio(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getRuntime(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
- return re.search(r"\d+", str(result)).group()
-
-
-def getLabel(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getNum(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'品番:')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'品番:')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getYear(getRelease):
- try:
- result = str(re.search(r"\d{4}", getRelease).group())
- return result
- except:
- return getRelease
-
-
-def getRelease(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
- )[0].lstrip("\n")
- except:
- result = html.xpath(
- "//td[contains(text(),'発売日:')]/following-sibling::td/text()"
- )[0].lstrip("\n")
- return result
-
-
-def getTag(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
- )
- except:
- result = html.xpath(
- "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
- )
- return result
-
-
-def getCover(text, number):
- html = etree.fromstring(text, etree.HTMLParser())
- cover_number = number
- try:
- result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
- except:
- # sometimes fanza modify _ to \u0005f for image id
- if "_" in cover_number:
- cover_number = cover_number.replace("_", r"\u005f")
- try:
- result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
- except:
- # (TODO) handle more edge case
- # print(html)
- # raise exception here, same behavior as before
- # people's major requirement is fetching the picture
- raise ValueError("can not find image")
- return result
-
-
-def getDirector(text):
- html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- try:
- result = html.xpath(
- "//td[contains(text(),'監督:')]/following-sibling::td/a/text()"
- )[0]
- except:
- result = html.xpath(
- "//td[contains(text(),'監督:')]/following-sibling::td/text()"
- )[0]
- return result
-
-
-def getOutline(text):
- html = etree.fromstring(text, etree.HTMLParser())
- try:
- result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
- "\n", ""
- )
- if result == "":
- result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
- "\n", ""
- )
- except:
- # (TODO) handle more edge case
- # print(html)
- return ""
- return result
-
-
-def main(number):
- # fanza allow letter + number + underscore, normalize the input here
- # @note: I only find the usage of underscore as h_test123456789
- fanza_search_number = number
- # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
- if fanza_search_number.startswith("h-"):
- fanza_search_number = fanza_search_number.replace("h-", "h_")
-
- fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
-
- fanza_urls = [
- "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
- "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
- "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
- "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
- ]
- chosen_url = ""
- for url in fanza_urls:
- chosen_url = url + fanza_search_number
- htmlcode = get_html(chosen_url)
- if "404 Not Found" not in htmlcode:
- break
- if "404 Not Found" in htmlcode:
- return json.dumps({"title": "",})
- try:
- # for some old page, the input number does not match the page
- # for example, the url will be cid=test012
- # but the hinban on the page is test00012
- # so get the hinban first, and then pass it to following functions
- fanza_hinban = getNum(htmlcode)
- data = {
- "title": getTitle(htmlcode).strip(getActor(htmlcode)),
- "studio": getStudio(htmlcode),
- "outline": getOutline(htmlcode),
- "runtime": getRuntime(htmlcode),
- "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
- "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
- "release": getRelease(htmlcode),
- "number": fanza_hinban,
- "cover": getCover(htmlcode, fanza_hinban),
- "imagecut": 1,
- "tag": getTag(htmlcode),
- "label": getLabel(htmlcode),
- "year": getYear(
- getRelease(htmlcode)
- ), # str(re.search('\d{4}',getRelease(a)).group()),
- "actor_photo": "",
- "website": chosen_url,
- "source": "fanza.py",
- }
- except:
- data = {
- "title": "",
- }
- js = json.dumps(
- data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
- ) # .encode('UTF-8')
- return js
-
-
-if __name__ == "__main__":
- # print(main("DV-1562"))
- # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
- # print(main("ipx292"))
- pass
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import json
+import re
+
+from lxml import etree
+
+from ADC_function import *
+
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+
+def getTitle(text):
+ html = etree.fromstring(text, etree.HTMLParser())
+ result = html.xpath('//*[@id="title"]/text()')[0]
+ return result
+
+
+def getActor(text):
+ # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ html = etree.fromstring(text, etree.HTMLParser())
+ result = (
+ str(
+ html.xpath(
+ "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
+ )
+ )
+ .strip(" ['']")
+ .replace("', '", ",")
+ )
+ return result
+
+
+def getStudio(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getRuntime(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
+ return re.search(r"\d+", str(result)).group()
+
+
+def getLabel(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getNum(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'品番:')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'品番:')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getYear(getRelease):
+ try:
+ result = str(re.search(r"\d{4}", getRelease).group())
+ return result
+ except:
+ return getRelease
+
+
+def getRelease(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()"
+ )[0].lstrip("\n")
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'発売日:')]/following-sibling::td/text()"
+ )[0].lstrip("\n")
+ return result
+
+
+def getTag(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()"
+ )
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()"
+ )
+ return result
+
+
+def getCover(text, number):
+ html = etree.fromstring(text, etree.HTMLParser())
+ cover_number = number
+ try:
+ result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+ except:
+ # sometimes fanza modify _ to \u0005f for image id
+ if "_" in cover_number:
+ cover_number = cover_number.replace("_", r"\u005f")
+ try:
+ result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
+ except:
+ # (TODO) handle more edge case
+ # print(html)
+ # raise exception here, same behavior as before
+ # people's major requirement is fetching the picture
+ raise ValueError("can not find image")
+ return result
+
+
+def getDirector(text):
+ html = etree.fromstring(text, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ try:
+ result = html.xpath(
+ "//td[contains(text(),'監督:')]/following-sibling::td/a/text()"
+ )[0]
+ except:
+ result = html.xpath(
+ "//td[contains(text(),'監督:')]/following-sibling::td/text()"
+ )[0]
+ return result
+
+
+def getOutline(text):
+ html = etree.fromstring(text, etree.HTMLParser())
+ try:
+ result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
+ "\n", ""
+ )
+ if result == "":
+ result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
+ "\n", ""
+ )
+ except:
+ # (TODO) handle more edge case
+ # print(html)
+ return ""
+ return result
+
+
+def main(number):
+ # fanza allow letter + number + underscore, normalize the input here
+ # @note: I only find the usage of underscore as h_test123456789
+ fanza_search_number = number
+ # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
+ if fanza_search_number.startswith("h-"):
+ fanza_search_number = fanza_search_number.replace("h-", "h_")
+
+ fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
+
+ fanza_urls = [
+ "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
+ "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
+ "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
+ "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
+ ]
+ chosen_url = ""
+ for url in fanza_urls:
+ chosen_url = url + fanza_search_number
+ htmlcode = get_html(chosen_url)
+ if "404 Not Found" not in htmlcode:
+ break
+ if "404 Not Found" in htmlcode:
+ return json.dumps({"title": "",})
+ try:
+ # for some old page, the input number does not match the page
+ # for example, the url will be cid=test012
+ # but the hinban on the page is test00012
+ # so get the hinban first, and then pass it to following functions
+ fanza_hinban = getNum(htmlcode)
+ data = {
+ "title": getTitle(htmlcode).strip(getActor(htmlcode)),
+ "studio": getStudio(htmlcode),
+ "outline": getOutline(htmlcode),
+ "runtime": getRuntime(htmlcode),
+ "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
+ "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
+ "release": getRelease(htmlcode),
+ "number": fanza_hinban,
+ "cover": getCover(htmlcode, fanza_hinban),
+ "imagecut": 1,
+ "tag": getTag(htmlcode),
+ "label": getLabel(htmlcode),
+ "year": getYear(
+ getRelease(htmlcode)
+ ), # str(re.search('\d{4}',getRelease(a)).group()),
+ "actor_photo": "",
+ "website": chosen_url,
+ "source": "fanza.py",
+ }
+ except:
+ data = {
+ "title": "",
+ }
+ js = json.dumps(
+ data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
+ ) # .encode('UTF-8')
+ return js
+
+
+if __name__ == "__main__":
+ # print(main("DV-1562"))
+ # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
+ # print(main("ipx292"))
+ pass
diff --git a/fc2fans_club.py b/SiteSource/fc2fans_club.py
similarity index 97%
rename from fc2fans_club.py
rename to SiteSource/fc2fans_club.py
index 3215e49..9dfeb24 100755
--- a/fc2fans_club.py
+++ b/SiteSource/fc2fans_club.py
@@ -1,162 +1,162 @@
-import re
-from lxml import etree#need install
-import json
-import ADC_function
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(htmlcode): #获取厂商
- #print(htmlcode)
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
- result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
- #print(result2)
- return result2
-def getActor(htmlcode):
- try:
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
- return result
- except:
- return ''
-def getStudio(htmlcode): #获取厂商
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
- return result
-def getNum(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
- #print(result)
- return result
-def getRelease(htmlcode2): #
- #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
- html=etree.fromstring(htmlcode2,etree.HTMLParser())
- result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
- return result
-def getCover(htmlcode,number,htmlcode2): #获取厂商 #
- #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
- if result == '':
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
- return 'https://fc2club.com' + result2
- return 'http:' + result
-def getOutline(htmlcode2): #获取番号 #
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
- return result
-def getTag(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
- return result.strip(" ['']").replace("'",'').replace(' ','')
-def getYear(release):
- try:
- result = re.search('\d{4}',release).group()
- return result
- except:
- return ''
-
-def getTitle_fc2com(htmlcode): #获取厂商
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
- return result
-def getActor_fc2com(htmlcode):
- try:
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
- return result
- except:
- return ''
-def getStudio_fc2com(htmlcode): #获取厂商
- try:
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
- return result
- except:
- return ''
-def getNum_fc2com(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
- return result
-def getRelease_fc2com(htmlcode2): #
- html=etree.fromstring(htmlcode2,etree.HTMLParser())
- result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
- return result
-def getCover_fc2com(htmlcode2): #获取厂商 #
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
- return 'http:' + result
-def getOutline_fc2com(htmlcode2): #获取番号 #
- html = etree.fromstring(htmlcode2, etree.HTMLParser())
- result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
- return result
-def getTag_fc2com(number): #获取番号
- htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
- result = re.findall('"tag":"(.*?)"', htmlcode)
- return result
-def getYear_fc2com(release):
- try:
- result = re.search('\d{4}',release).group()
- return result
- except:
- return ''
-
-def main(number):
- try:
- htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
- htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
- actor = getActor(htmlcode)
- if getActor(htmlcode) == '':
- actor = 'FC2系列'
- dic = {
- 'title': getTitle(htmlcode),
- 'studio': getStudio(htmlcode),
- 'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
- 'outline': '',#getOutline(htmlcode2),
- 'runtime': getYear(getRelease(htmlcode)),
- 'director': getStudio(htmlcode),
- 'actor': actor,
- 'release': getRelease(number),
- 'number': 'FC2-'+number,
- 'label': '',
- 'cover': getCover(htmlcode,number,htmlcode2),
- 'imagecut': 0,
- 'tag': getTag(htmlcode),
- 'actor_photo':'',
- 'website': 'https://fc2club.com//html/FC2-' + number + '.html',
- 'source':'https://fc2club.com//html/FC2-' + number + '.html',
- }
- if dic['title'] == '':
- htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
- actor = getActor(htmlcode)
- if getActor(htmlcode) == '':
- actor = 'FC2系列'
- dic = {
- 'title': getTitle_fc2com(htmlcode2),
- 'studio': getStudio_fc2com(htmlcode2),
- 'year': '', # str(re.search('\d{4}',getRelease(number)).group()),
- 'outline': getOutline_fc2com(htmlcode2),
- 'runtime': getYear_fc2com(getRelease(htmlcode2)),
- 'director': getStudio_fc2com(htmlcode2),
- 'actor': actor,
- 'release': getRelease_fc2com(number),
- 'number': 'FC2-' + number,
- 'cover': getCover_fc2com(htmlcode2),
- 'imagecut': 0,
- 'tag': getTag_fc2com(number),
- 'label': '',
- 'actor_photo': '',
- 'website': 'http://adult.contents.fc2.com/article/' + number + '/',
- 'source': 'http://adult.contents.fc2.com/article/' + number + '/',
- }
- except Exception as e:
- # (TODO) better handle this
- # print(e)
- dic = {"title": ""}
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
- return js
-
-
-#print(main('1252953'))
+import re
+from lxml import etree#need install
+import json
+import ADC_function
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(htmlcode): #获取厂商
+ #print(htmlcode)
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
+ result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
+ #print(result2)
+ return result2
+def getActor(htmlcode):
+ try:
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
+ return result
+ except:
+ return ''
+def getStudio(htmlcode): #获取厂商
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
+ return result
+def getNum(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+ #print(result)
+ return result
+def getRelease(htmlcode2): #
+ #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
+ html=etree.fromstring(htmlcode2,etree.HTMLParser())
+ result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
+ return result
+def getCover(htmlcode,number,htmlcode2): #获取厂商 #
+ #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
+ if result == '':
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
+ return 'https://fc2club.com' + result2
+ return 'http:' + result
+def getOutline(htmlcode2): #获取番号 #
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
+ return result
+def getTag(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
+ return result.strip(" ['']").replace("'",'').replace(' ','')
+def getYear(release):
+ try:
+ result = re.search('\d{4}',release).group()
+ return result
+ except:
+ return ''
+
+def getTitle_fc2com(htmlcode): #获取厂商
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
+ return result
+def getActor_fc2com(htmlcode):
+ try:
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
+ return result
+ except:
+ return ''
+def getStudio_fc2com(htmlcode): #获取厂商
+ try:
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
+ return result
+ except:
+ return ''
+def getNum_fc2com(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+ return result
+def getRelease_fc2com(htmlcode2): #
+ html=etree.fromstring(htmlcode2,etree.HTMLParser())
+ result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
+ return result
+def getCover_fc2com(htmlcode2): #获取厂商 #
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
+ return 'http:' + result
+def getOutline_fc2com(htmlcode2): #获取番号 #
+ html = etree.fromstring(htmlcode2, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip(' ').replace('。,',',')
+ return result
+def getTag_fc2com(number): #获取番号
+ htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
+ result = re.findall('"tag":"(.*?)"', htmlcode)
+ return result
+def getYear_fc2com(release):
+ try:
+ result = re.search('\d{4}',release).group()
+ return result
+ except:
+ return ''
+
+def main(number):
+ try:
+ htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
+ htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
+ actor = getActor(htmlcode)
+ if getActor(htmlcode) == '':
+ actor = 'FC2系列'
+ dic = {
+ 'title': getTitle(htmlcode),
+ 'studio': getStudio(htmlcode),
+ 'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
+ 'outline': '',#getOutline(htmlcode2),
+ 'runtime': getYear(getRelease(htmlcode)),
+ 'director': getStudio(htmlcode),
+ 'actor': actor,
+ 'release': getRelease(number),
+ 'number': 'FC2-'+number,
+ 'label': '',
+ 'cover': getCover(htmlcode,number,htmlcode2),
+ 'imagecut': 0,
+ 'tag': getTag(htmlcode),
+ 'actor_photo':'',
+ 'website': 'https://fc2club.com//html/FC2-' + number + '.html',
+ 'source':'https://fc2club.com//html/FC2-' + number + '.html',
+ }
+ if dic['title'] == '':
+ htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
+ actor = getActor(htmlcode)
+ if getActor(htmlcode) == '':
+ actor = 'FC2系列'
+ dic = {
+ 'title': getTitle_fc2com(htmlcode2),
+ 'studio': getStudio_fc2com(htmlcode2),
+ 'year': '', # str(re.search('\d{4}',getRelease(number)).group()),
+ 'outline': getOutline_fc2com(htmlcode2),
+ 'runtime': getYear_fc2com(getRelease(htmlcode2)),
+ 'director': getStudio_fc2com(htmlcode2),
+ 'actor': actor,
+ 'release': getRelease_fc2com(number),
+ 'number': 'FC2-' + number,
+ 'cover': getCover_fc2com(htmlcode2),
+ 'imagecut': 0,
+ 'tag': getTag_fc2com(number),
+ 'label': '',
+ 'actor_photo': '',
+ 'website': 'http://adult.contents.fc2.com/article/' + number + '/',
+ 'source': 'http://adult.contents.fc2.com/article/' + number + '/',
+ }
+ except Exception as e:
+ # (TODO) better handle this
+ # print(e)
+ dic = {"title": ""}
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
+ return js
+
+
+#print(main('1252953'))
diff --git a/javbus.py b/SiteSource/javbus.py
similarity index 95%
rename from javbus.py
rename to SiteSource/javbus.py
index aa18d2a..ea06ac4 100755
--- a/javbus.py
+++ b/SiteSource/javbus.py
@@ -1,138 +1,139 @@
-import re
-from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
-from bs4 import BeautifulSoup#need install
-import json
-from ADC_function import *
-
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find_all(attrs={'class': 'star-name'})
- d={}
- for i in a:
- l=i.a['href']
- t=i.get_text()
- html = etree.fromstring(get_html(l), etree.HTMLParser())
- p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
- p2={t:p}
- d.update(p2)
- return d
-def getTitle(htmlcode): #获取标题
- doc = pq(htmlcode)
- title=str(doc('div.container h3').text()).replace(' ','-')
- try:
- title2 = re.sub('n\d+-','',title)
- return title2
- except:
- return title
-def getStudio(htmlcode): #获取厂商
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
- return result
-def getYear(htmlcode): #获取年份
- html = etree.fromstring(htmlcode,etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
- return result
-def getCover(htmlcode): #获取封面链接
- doc = pq(htmlcode)
- image = doc('a.bigImage')
- return image.attr('href')
-def getRelease(htmlcode): #获取出版日期
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
- return result
-def getRuntime(htmlcode): #获取分钟
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find(text=re.compile('分鐘'))
- return a
-def getActor(htmlcode): #获取女优
- b=[]
- soup=BeautifulSoup(htmlcode,'lxml')
- a=soup.find_all(attrs={'class':'star-name'})
- for i in a:
- b.append(i.get_text())
- return b
-def getNum(htmlcode): #获取番号
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
- return result
-def getDirector(htmlcode): #获取导演
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
- return result
-def getOutline(htmlcode): #获取演员
- doc = pq(htmlcode)
- result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
- return result
-def getSerise(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
- return result
-def getTag(htmlcode): # 获取演员
- tag = []
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = soup.find_all(attrs={'class': 'genre'})
- for i in a:
- if 'onmouseout' in str(i):
- continue
- tag.append(i.get_text())
- return tag
-
-
-def main(number):
- try:
- htmlcode = get_html('https://www.javbus.com/' + number)
- try:
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
- except:
- dww_htmlcode = ''
- dic = {
- 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
- 'studio': getStudio(htmlcode),
- 'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
- 'outline': getOutline(dww_htmlcode),
- 'runtime': getRuntime(htmlcode),
- 'director': getDirector(htmlcode),
- 'actor': getActor(htmlcode),
- 'release': getRelease(htmlcode),
- 'number': getNum(htmlcode),
- 'cover': getCover(htmlcode),
- 'imagecut': 1,
- 'tag': getTag(htmlcode),
- 'label': getSerise(htmlcode),
- 'actor_photo': getActorPhoto(htmlcode),
- 'website': 'https://www.javbus.com/' + number,
- 'source' : 'javbus.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
- except:
- return main_uncensored(number)
-
-def main_uncensored(number):
- htmlcode = get_html('https://www.javbus.com/' + number)
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
- if getTitle(htmlcode) == '':
- htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
- dic = {
- 'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
- 'studio': getStudio(htmlcode),
- 'year': getYear(htmlcode),
- 'outline': getOutline(dww_htmlcode),
- 'runtime': getRuntime(htmlcode),
- 'director': getDirector(htmlcode),
- 'actor': getActor(htmlcode),
- 'release': getRelease(htmlcode),
- 'number': getNum(htmlcode),
- 'cover': getCover(htmlcode),
- 'tag': getTag(htmlcode),
- 'label': getSerise(htmlcode),
- 'imagecut': 0,
- 'actor_photo': '',
- 'website': 'https://www.javbus.com/' + number,
- 'source': 'javbus.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
-
+import re
+from pyquery import PyQuery as pq#need install
+from lxml import etree#need install
+from bs4 import BeautifulSoup#need install
+import json
+from ADC_function import *
+
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find_all(attrs={'class': 'star-name'})
+ d={}
+ for i in a:
+ l=i.a['href']
+ t=i.get_text()
+ html = etree.fromstring(get_html(l), etree.HTMLParser())
+ p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+ p2={t:p}
+ d.update(p2)
+ return d
+def getTitle(htmlcode): #获取标题
+ doc = pq(htmlcode)
+ title=str(doc('div.container h3').text()).replace(' ','-')
+ try:
+ title2 = re.sub('n\d+-','',title)
+ return title2
+ except:
+ return title
+def getStudio(htmlcode): #获取厂商
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
+ return result
+def getYear(htmlcode): #获取年份
+ html = etree.fromstring(htmlcode,etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+ return result
+def getCover(htmlcode): #获取封面链接
+ doc = pq(htmlcode)
+ image = doc('a.bigImage')
+ return image.attr('href')
+def getRelease(htmlcode): #获取出版日期
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+ return result
+def getRuntime(htmlcode): #获取分钟
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find(text=re.compile('分鐘'))
+ return a
+def getActor(htmlcode): #获取女优
+ b=[]
+ soup=BeautifulSoup(htmlcode,'lxml')
+ a=soup.find_all(attrs={'class':'star-name'})
+ for i in a:
+ b.append(i.get_text())
+ return b
+def getNum(htmlcode): #获取番号
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+ return result
+def getDirector(htmlcode): #获取导演
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
+ return result
+def getOutline(htmlcode): #获取演员
+ doc = pq(htmlcode)
+ result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
+ return result
+def getSerise(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
+ return result
+def getTag(htmlcode): # 获取演员
+ tag = []
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = soup.find_all(attrs={'class': 'genre'})
+ for i in a:
+ if 'onmouseout' in str(i):
+ continue
+ tag.append(i.get_text())
+ return tag
+
+
+def main(number):
+ try:
+ htmlcode = get_html('https://www.javbus.com/' + number)
+ try:
+ dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+ except:
+ dww_htmlcode = ''
+ dic = {
+ 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
+ 'studio': getStudio(htmlcode),
+ 'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
+ 'outline': getOutline(dww_htmlcode),
+ 'runtime': getRuntime(htmlcode),
+ 'director': getDirector(htmlcode),
+ 'actor': getActor(htmlcode),
+ 'release': getRelease(htmlcode),
+ 'number': getNum(htmlcode),
+ 'cover': getCover(htmlcode),
+ 'imagecut': 1,
+ 'tag': getTag(htmlcode),
+ 'label': getSerise(htmlcode),
+ 'actor_photo': getActorPhoto(htmlcode),
+ 'website': 'https://www.javbus.com/' + number,
+ 'source' : 'javbus.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+ except:
+ return main_uncensored(number)
+
+
+def main_uncensored(number): # 无码
+ htmlcode = get_html('https://www.javbus.com/' + number)
+ dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+ if getTitle(htmlcode) == '':
+ htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
+ dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+ dic = {
+ 'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))).replace(getNum(htmlcode)+'-', ''),
+ 'studio': getStudio(htmlcode),
+ 'year': getYear(htmlcode),
+ 'outline': getOutline(dww_htmlcode),
+ 'runtime': getRuntime(htmlcode),
+ 'director': getDirector(htmlcode),
+ 'actor': getActor(htmlcode),
+ 'release': getRelease(htmlcode),
+ 'number': getNum(htmlcode),
+ 'cover': getCover(htmlcode),
+ 'tag': getTag(htmlcode),
+ 'label': getSerise(htmlcode),
+ 'imagecut': 0,
+ 'actor_photo': '',
+ 'website': 'https://www.javbus.com/' + number,
+ 'source': 'javbus.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+
diff --git a/javdb.py b/SiteSource/javdb.py
similarity index 98%
rename from javdb.py
rename to SiteSource/javdb.py
index 727c992..180602a 100755
--- a/javdb.py
+++ b/SiteSource/javdb.py
@@ -1,123 +1,123 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(a):
- html = etree.fromstring(a, etree.HTMLParser())
- result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
- return result
-def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
-def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
- a = actor.split(',')
- d={}
- for i in a:
- p={i:''}
- d.update(p)
- return d
-def getStudio(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getRuntime(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getNum(a):
- html = etree.fromstring(a, etree.HTMLParser())
- result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result2 + result1).strip('+')
-def getYear(getRelease):
- try:
- result = str(re.search('\d{4}', getRelease).group())
- return result
- except:
- return getRelease
-def getRelease(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+')
-def getTag(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
-def getCover_small(a, index=0):
- # same issue mentioned below,
- # javdb sometime returns multiple results
- # DO NOT just get the firt one, get the one with correct index number
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
- if not 'https' in result:
- result = 'https:' + result
- return result
-def getCover(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
- return result
-def getDirector(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
- result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
- return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getOutline(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
- return result
-def main(number):
- try:
- number = number.upper()
- query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
- html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- # javdb sometime returns multiple results,
- # and the first elememt maybe not the one we are looking for
- # iterate all candidates and find the match one
- urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
- ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
- correct_url = urls[ids.index(number)]
- detail_page = get_html('https://javdb.com' + correct_url)
- dic = {
- 'actor': getActor(detail_page),
- 'title': getTitle(detail_page),
- 'studio': getStudio(detail_page),
- 'outline': getOutline(detail_page),
- 'runtime': getRuntime(detail_page),
- 'director': getDirector(detail_page),
- 'release': getRelease(detail_page),
- 'number': getNum(detail_page),
- 'cover': getCover(detail_page),
- 'cover_small': getCover_small(query_result, index=ids.index(number)),
- 'imagecut': 3,
- 'tag': getTag(detail_page),
- 'label': getLabel(detail_page),
- 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
- 'actor_photo': getActorPhoto(getActor(detail_page)),
- 'website': 'https://javdb.com' + correct_url,
- 'source': 'javdb.py',
- }
- except Exception as e:
- # print(e)
- dic = {"title": ""}
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
-
-# main('DV-1562')
-# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
-#print(main('ipx-292'))
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(a):
+ html = etree.fromstring(a, etree.HTMLParser())
+ result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
+ return result
+def getActor(a): # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
+def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
+ a = actor.split(',')
+ d={}
+ for i in a:
+ p={i:''}
+ d.update(p)
+ return d
+def getStudio(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+def getRuntime(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').rstrip('mi')
+def getLabel(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+def getNum(a):
+ html = etree.fromstring(a, etree.HTMLParser())
+ result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result2 + result1).strip('+')
+def getYear(getRelease):
+ try:
+ result = str(re.search('\d{4}', getRelease).group())
+ return result
+ except:
+ return getRelease
+def getRelease(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+')
+def getTag(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
+def getCover_small(a, index=0):
+ # same issue mentioned below,
+ # javdb sometime returns multiple results
+ # DO NOT just get the firt one, get the one with correct index number
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
+ if not 'https' in result:
+ result = 'https:' + result
+ return result
+def getCover(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
+ return result
+def getDirector(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
+ result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
+ return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+def getOutline(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
+ return result
+def main(number):
+ try:
+ number = number.upper()
+ query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
+ html = etree.fromstring(query_result, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ # javdb sometime returns multiple results,
+ # and the first elememt maybe not the one we are looking for
+ # iterate all candidates and find the match one
+ urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
+ ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
+ correct_url = urls[ids.index(number)]
+ detail_page = get_html('https://javdb.com' + correct_url)
+ dic = {
+ 'actor': getActor(detail_page),
+ 'title': getTitle(detail_page),
+ 'studio': getStudio(detail_page),
+ 'outline': getOutline(detail_page),
+ 'runtime': getRuntime(detail_page),
+ 'director': getDirector(detail_page),
+ 'release': getRelease(detail_page),
+ 'number': getNum(detail_page),
+ 'cover': getCover(detail_page),
+ 'cover_small': getCover_small(query_result, index=ids.index(number)),
+ 'imagecut': 3,
+ 'tag': getTag(detail_page),
+ 'label': getLabel(detail_page),
+ 'year': getYear(getRelease(detail_page)), # str(re.search('\d{4}',getRelease(a)).group()),
+ 'actor_photo': getActorPhoto(getActor(detail_page)),
+ 'website': 'https://javdb.com' + correct_url,
+ 'source': 'javdb.py',
+ }
+ except Exception as e:
+ # print(e)
+ dic = {"title": ""}
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+
+# main('DV-1562')
+# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。")
+#print(main('ipx-292'))
diff --git a/mgstage.py b/SiteSource/mgstage.py
similarity index 98%
rename from mgstage.py
rename to SiteSource/mgstage.py
index 8e358c9..d1a8e95 100755
--- a/mgstage.py
+++ b/SiteSource/mgstage.py
@@ -1,108 +1,108 @@
-import re
-from lxml import etree
-import json
-from bs4 import BeautifulSoup
-from ADC_function import *
-# import sys
-# import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
-def getTitle(a):
- try:
- html = etree.fromstring(a, etree.HTMLParser())
- result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
- return result.replace('/', ',')
- except:
- return ''
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
- html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
- result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
-def getStudio(a):
- html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
- result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- return str(result1+result2).strip('+').replace("', '",'').replace('"','')
-def getRuntime(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
- return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getNum(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+')
-def getYear(getRelease):
- try:
- result = str(re.search('\d{4}',getRelease).group())
- return result
- except:
- return getRelease
-def getRelease(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+')
-def getTag(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
-def getCover(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
- # /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
- return result
-def getDirector(a):
- html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
- result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
- '\\n')
- return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-def getOutline(htmlcode):
- html = etree.fromstring(htmlcode, etree.HTMLParser())
- result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
- return result
-def main(number2):
- number=number2.upper()
- htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
- soup = BeautifulSoup(htmlcode, 'lxml')
- a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
- dic = {
- 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
- 'studio': getStudio(a),
- 'outline': getOutline(htmlcode),
- 'runtime': getRuntime(a),
- 'director': getDirector(a),
- 'actor': getActor(a),
- 'release': getRelease(a),
- 'number': getNum(a),
- 'cover': getCover(htmlcode),
- 'imagecut': 0,
- 'tag': getTag(a),
- 'label':getLabel(a),
- 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
- 'actor_photo': '',
- 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
- 'source': 'mgstage.py',
- }
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- return js
-
-#print(main('SIRO-3607'))
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(a):
+ try:
+ html = etree.fromstring(a, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
+ return result.replace('/', ',')
+ except:
+ return ''
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+ html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+ result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
+def getStudio(a):
+ html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+ result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ return str(result1+result2).strip('+').replace("', '",'').replace('"','')
+def getRuntime(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n ').strip('\\n')
+ return str(result1 + result2).strip('+').rstrip('mi')
+def getLabel(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+def getNum(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+')
+def getYear(getRelease):
+ try:
+ result = str(re.search('\d{4}',getRelease).group())
+ return result
+ except:
+ return getRelease
+def getRelease(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+')
+def getTag(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
+def getCover(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
+ # /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
+ return result
+def getDirector(a):
+ html = etree.fromstring(a, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
+ result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n ').strip(
+ '\\n')
+ return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+def getOutline(htmlcode):
+ html = etree.fromstring(htmlcode, etree.HTMLParser())
+ result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
+ return result
+def main(number2):
+ number=number2.upper()
+ htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
+ soup = BeautifulSoup(htmlcode, 'lxml')
+ a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','').replace(' ','').replace('\n ','').replace('\n ','')
+ dic = {
+ 'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
+ 'studio': getStudio(a),
+ 'outline': getOutline(htmlcode),
+ 'runtime': getRuntime(a),
+ 'director': getDirector(a),
+ 'actor': getActor(a),
+ 'release': getRelease(a),
+ 'number': getNum(a),
+ 'cover': getCover(htmlcode),
+ 'imagecut': 0,
+ 'tag': getTag(a),
+ 'label':getLabel(a),
+ 'year': getYear(getRelease(a)), # str(re.search('\d{4}',getRelease(a)).group()),
+ 'actor_photo': '',
+ 'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
+ 'source': 'mgstage.py',
+ }
+ js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js
+
+#print(main('SIRO-3607'))
diff --git a/TestPathNFO.txt b/TestPathNFO.txt
new file mode 100644
index 0000000..399647c
--- /dev/null
+++ b/TestPathNFO.txt
@@ -0,0 +1,41 @@
+/Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi
+/Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4
+/Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21299.mkv
+/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
+/mcdv47.avi
+/mcdv-47.avi
+/mcdv-047.mp4
+/mcdv047.mp4
+/mcdv0047.mp4
+/1pondo-070409_621.mp4
+/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
+/Volumes/Adult/Files/107NTTR-037A.mp4
+/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
+/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
+/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv
+/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
+/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
+/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb
+/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
+/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
+/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
+/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
+/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
+/Volumes/Adult/Files/tia/soe935C.HD.wmv
+/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
+/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
+/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
+/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
+/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
+/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
+/Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi
+/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
+/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
+/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
+/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv
\ No newline at end of file
diff --git a/TestPathSpecial.txt b/TestPathSpecial.txt
new file mode 100644
index 0000000..cc22544
--- /dev/null
+++ b/TestPathSpecial.txt
@@ -0,0 +1,51 @@
+/Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21222.mkv
+/Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi
+/Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4
+/Volumes/192.168.2.100/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
+/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-1 彼女の姉貴とイケナイ関係 Rio.wmv
+/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999A 彼女の姉貴とイケナイ関係 Rio.wmv
+/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-A 彼女の姉貴とイケナイ関係 Rio.wmv
+/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-C 彼女の姉貴とイケナイ関係 Rio.wmv
+/Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-B 彼女の姉貴とイケナイ関係 Rio.wmv
+/Volumes/192.168.2.100/Adult/Files/tia/soe935C.HD.wmv
+/Volumes/192.168.2.100/Adult/Files/tia/soe935B.HD.wmv
+/Volumes/192.168.2.100/Adult/Files/tia/soe935A.HD.wmv
+/Volumes/192.168.2.100/Adult/Files/tia/soe935D.HD.wmv
+/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
+/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
+/mcdv47.avi
+/mcdv-47.avi
+/mcdv-047.mp4
+/mcdv047.mp4
+/mcdv0047.mp4
+/1pondo-070409_621.mp4
+/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
+/Volumes/Adult/Files/107NTTR-037A.mp4
+/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
+/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
+/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv
+/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
+/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
+/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb
+/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
+/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
+/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
+/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
+/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
+/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
+/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
+/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
+/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
+/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
+/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
+/Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi
+/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
+/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
+/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
+/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv
\ No newline at end of file
diff --git a/TestPaths.txt b/TestPaths.txt
new file mode 100644
index 0000000..93ecbe0
--- /dev/null
+++ b/TestPaths.txt
@@ -0,0 +1,50 @@
+/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
+/1pondo-070409_621.mp4
+/Volumes/Adult/Files/107NTTR-037.mp4
+/Volumes/Adult/Files/107NTTR-037A.mp4
+/Volumes/Adult/Files/Yua.Mikami-PML/TEK-097 ふたりは無敵.wmv
+/Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
+/Volumes/Adult/Files/Yua.Mikami-PML/SSNI-030 三上悠亜ファン感謝祭 国民的アイドル×一般ユーザー20人‘ガチファンとSEX解禁’ハメまくりスペシャル【桃花族】.mp4
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/MIDD-893A.mkv
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
+/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
+/Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv
+/Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
+/Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
+/Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb
+/Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
+/Volumes/Adult/Files/桜木凛 Rin Sakuragi FHD Collection Pack Vol/BBI-183.wmv
+/Volumes/Adult/Files/NOP-019 芭蕾教室 水嶋あずみ/NOP019B.HD.wmv
+/Volumes/Adult/Files/一ノ瀬アメリ part2/栗栖エリカ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
+/Volumes/Adult/Files/一ノ瀬アメリ part2/Max Girls/Max Girls 24(xv804)伊東遥,Rio,小沢アリス,葉月しおり,一ノ瀬アメリ,ひなた結衣,藤崎りお.avi
+/Volumes/Adult/Files/一ノ瀬アメリ part2/ノ瀬アメリAmeri Ichinose/20091127一ノ瀬アメリ - 一見面就做愛(xv801).avi
+/Volumes/Adult/Files/Aki Sasaki Megapack/MSTG-003.mkv
+/Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
+/Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
+/Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
+/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-171)彼女のお姉さんは、誘惑ヤリたがり娘。桃谷エリカ.wmv
+/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-145)濃密な接吻と欲情ベロキス性交 04 桃谷エリカ.wmv
+/Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
+/Volumes/Adult/Files/tia/soe935C.HD.wmv
+/Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
+/Volumes/Adult/Files/sakumomo1203-PML/IDBD-795 ももに夢中 2018年日本人にもっとも愛された女優桜空ももPREMIUM BOX8時間BEST.mp4
+/Volumes/Adult/Files/sakumomo1203-PML/IDBD-768 Gカップグラビアアイドル桜空もも初ベスト 原石 2【桃花族】.mp4
+/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
+/Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
+/Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
+/Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
+/Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#3(190119)@RUNBKK/No-Watermarked/SOE976.FHD3.wmv
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
+/Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
+/Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
+/Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
+/Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi
+/Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
+/Volumes/Adult/Files/ノ瀬アメリ/20100226一ノ瀬アメリ - OL Style 制服(xv827).avi
+/Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
+/Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
+/Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
+/Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv
+/Volumes/Adult/Files/Uncensored Mosaic Removal Megapack/ADN-017(Asami Ogawa).mp4
\ No newline at end of file
diff --git a/config.ini b/config.ini
old mode 100644
new mode 100755
index f2ac60d..a017bab
--- a/config.ini
+++ b/config.ini
@@ -1,28 +1,35 @@
[common]
-main_mode=1
-failed_output_folder=failed
-success_output_folder=JAV_output
+main_mode=2
+# 路径均为绝对路径,不要写入" '等符号
+search_folder= /Volumes/192.168.2.100/Adult/AVTest
+# 如果failed_output_folder 为空,抓取不到相关信息的视频将不回移动
+failed_output_folder= /Volumes/192.168.2.100/Adult/UnknownStars
+success_output_folder= /Volumes/192.168.2.100/Adult/Files
+#临时资源存储路径,比如xxx.nfo 海报图
+temp_folder= /Volumes/192.168.2.100/Adult/temp
+# 如果是远程挂载的盘符,建议不开启创建软连接:软连接链接的是绝对路径,远程NAS上的路径和本地挂载的路径一般不同。
soft_link=0
[proxy]
-proxy=127.0.0.1:1080
-timeout=10
-retry=3
+#例子为socks代理配置,可以 =后留空
+proxy= socks5h://127.0.0.1:1081
+timeout= 10
+retry= 5
[Name_Rule]
-location_rule=actor+'/'+number
-naming_rule=number+'-'+title
+location_rule= actor+'/'+number
+naming_rule= number+'-'+title
[update]
update_check=1
[media]
-media_warehouse=emby
#emby or plex or kodi ,emby=jellyfin
+media_warehouse=EMBY
[escape]
literals=\()
-folders=failed,JAV_output
+folders=/Volumes/Adult/UnknownStars,/Volumes/Adult/Stars
[debug_mode]
-switch=1
\ No newline at end of file
+switch=1
diff --git a/core.py b/core.py
index 5b47d6b..23e1237 100755
--- a/core.py
+++ b/core.py
@@ -1,691 +1,918 @@
-# -*- coding: utf-8 -*-
-
-import re
-import os
-import os.path
-import shutil
-from PIL import Image
-import time
-import json
-from ADC_function import *
-from configparser import ConfigParser
-import argparse
-# =========website========
-import fc2fans_club
-import mgstage
-import avsox
-import javbus
-import javdb
-import fanza
-import jav321
-import requests
-
-
-# =====================本地文件处理===========================
-
-def escapePath(path, Config): # Remove escape literals
- escapeLiterals = Config['escape']['literals']
- backslash = '\\'
- for literal in escapeLiterals:
- path = path.replace(backslash + literal, '')
- return path
-
-
-def moveFailedFolder(filepath, failed_folder):
- print('[-]Move to Failed output folder')
- shutil.move(filepath, str(os.getcwd()) + '/' + failed_folder + '/')
- return
-
-
-def CreatFailedFolder(failed_folder):
- if not os.path.exists(failed_folder + '/'): # 新建failed文件夹
- try:
- os.makedirs(failed_folder + '/')
- except:
- print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)")
- return
-
-
-def getDataFromJSON(file_number, filepath, failed_folder): # 从JSON返回元数据
- """
- iterate through all services and fetch the data
- """
-
- func_mapping = {
- "avsox": avsox.main,
- "fc2": fc2fans_club.main,
- "fanza": fanza.main,
- "javdb": javdb.main,
- "javbus": javbus.main,
- "mgstage": mgstage.main,
- "jav321": jav321.main,
- }
-
- # default fetch order list, from the begining to the end
- sources = ["javbus", "javdb", "fanza", "mgstage", "fc2", "avsox", "jav321"]
-
- # if the input file name matches centain rules,
- # move some web service to the begining of the list
- if re.match(r"^\d{5,}", file_number) or (
- "HEYZO" in file_number or "heyzo" in file_number or "Heyzo" in file_number
- ):
- sources.insert(0, sources.pop(sources.index("avsox")))
- elif re.match(r"\d+\D+", file_number) or (
- "siro" in file_number or "SIRO" in file_number or "Siro" in file_number
- ):
- sources.insert(0, sources.pop(sources.index("mgstage")))
- elif "fc2" in file_number or "FC2" in file_number:
- sources.insert(0, sources.pop(sources.index("fc2")))
-
- for source in sources:
- json_data = json.loads(func_mapping[source](file_number))
- # if any service return a valid return, break
- if getDataState(json_data) != 0:
- break
-
- # ================================================网站规则添加结束================================================
-
- title = json_data['title']
- actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
- release = json_data['release']
- number = json_data['number']
- studio = json_data['studio']
- source = json_data['source']
- runtime = json_data['runtime']
- outline = json_data['runtime']
- label = json_data['label']
- year = json_data['year']
- try:
- cover_small = json_data['cover_small']
- except:
- cover_small = ''
- imagecut = json_data['imagecut']
- tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @
- actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
-
-
- if title == '' or number == '':
- print('[-]Movie Data not found!')
- moveFailedFolder(filepath, failed_folder)
- return
-
- # if imagecut == '3':
- # DownloadFileWithFilename()
-
- # ====================处理异常字符====================== #\/:*?"<>|
- title = title.replace('\\', '')
- title = title.replace('/', '')
- title = title.replace(':', '')
- title = title.replace('*', '')
- title = title.replace('?', '')
- title = title.replace('"', '')
- title = title.replace('<', '')
- title = title.replace('>', '')
- title = title.replace('|', '')
- release = release.replace('/', '-')
- tmpArr = cover_small.split(',')
- if len(tmpArr) > 0:
- cover_small = tmpArr[0].strip('\"').strip('\'')
- # ====================处理异常字符 END================== #\/:*?"<>|
-
- naming_rule = eval(config['Name_Rule']['naming_rule'])
- location_rule = eval(config['Name_Rule']['location_rule'])
-
- # 返回处理后的json_data
- json_data['title'] = title
- json_data['actor'] = actor
- json_data['release'] = release
- json_data['cover_small'] = cover_small
- json_data['tag'] = tag
- json_data['naming_rule'] = naming_rule
- json_data['location_rule'] = location_rule
- json_data['year'] = year
- return json_data
-
-
-def get_info(json_data): # 返回json里的数据
- title = json_data['title']
- studio = json_data['studio']
- year = json_data['year']
- outline = json_data['outline']
- runtime = json_data['runtime']
- director = json_data['director']
- actor_photo = json_data['actor_photo']
- release = json_data['release']
- number = json_data['number']
- cover = json_data['cover']
- website = json_data['website']
- return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website
-
-
-def smallCoverCheck(path, number, imagecut, cover_small, c_word, option, Config, filepath, failed_folder):
- if imagecut == 3:
- if option == 'emby':
- DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
- try:
- img = Image.open(path + '/1.jpg')
- except Exception:
- img = Image.open('1.jpg')
- w = img.width
- h = img.height
- img.save(path + '/' + number + c_word + '.png')
- time.sleep(1)
- os.remove(path + '/1.jpg')
- if option == 'kodi':
- DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
- try:
- img = Image.open(path + '/1.jpg')
- except Exception:
- img = Image.open('1.jpg')
- w = img.width
- h = img.height
- img.save(path + '/' + number + c_word + '-poster.jpg')
- time.sleep(1)
- os.remove(path + '/1.jpg')
- if option == 'plex':
- DownloadFileWithFilename(cover_small, '1.jpg', path, Config, filepath, failed_folder)
- try:
- img = Image.open(path + '/1.jpg')
- except Exception:
- img = Image.open('1.jpg')
- w = img.width
- h = img.height
- img.save(path + '/poster.jpg')
- os.remove(path + '/1.jpg')
-
-
-def creatFolder(success_folder, location_rule, json_data, Config): # 创建文件夹
- title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
- if len(location_rule) > 240: # 新建成功输出文件夹
- path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor",
- "'manypeople'",
- 3) # path为影片+元数据所在目录
- else:
- path = success_folder + '/' + location_rule
- # print(path)
- if not os.path.exists(path):
- path = escapePath(path, Config)
- try:
- os.makedirs(path)
- except:
- path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number")
- path = escapePath(path, Config)
- os.makedirs(path)
- return path
-
-
-# =====================资源下载部分===========================
-def DownloadFileWithFilename(url, filename, path, Config, filepath, failed_folder): # path = examle:photo , video.in the Project Folder!
- proxy, timeout, retry_count = get_network_settings()
- i = 0
-
- while i < retry_count:
- try:
- if not proxy == '':
- if not os.path.exists(path):
- os.makedirs(path)
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- r = requests.get(url, headers=headers, timeout=timeout,
- proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)})
- if r == '':
- print('[-]Movie Data not found!')
- return
- with open(str(path) + "/" + filename, "wb") as code:
- code.write(r.content)
- return
- else:
- if not os.path.exists(path):
- os.makedirs(path)
- headers = {
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- r = requests.get(url, timeout=timeout, headers=headers)
- if r == '':
- print('[-]Movie Data not found!')
- return
- with open(str(path) + "/" + filename, "wb") as code:
- code.write(r.content)
- return
- except requests.exceptions.RequestException:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- except requests.exceptions.ConnectionError:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- except requests.exceptions.ProxyError:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- except requests.exceptions.ConnectTimeout:
- i += 1
- print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
- print('[-]Connect Failed! Please check your Proxy or Network!')
- moveFailedFolder(filepath, failed_folder)
- return
-
-
-def imageDownload(option, cover, number, c_word, path, multi_part, Config, filepath, failed_folder): # 封面是否下载成功,否则移动到failed
- if option == 'emby':
- if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder) == 'failed':
- moveFailedFolder(filepath, failed_folder)
- return
- DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
- if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
- print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
- return
- i = 1
- while i <= int(config['proxy']['retry']):
- if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
- print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
- DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
- i = i + 1
- continue
- else:
- break
- if multi_part == 1:
- old_name = os.path.join(path, number + c_word + '.jpg')
- new_name = os.path.join(path, number + c_word + '.jpg')
- os.rename(old_name, new_name)
- print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
- else:
- print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
- elif option == 'plex':
- if DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
- moveFailedFolder(filepath, failed_folder)
- return
- DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder)
- if not os.path.getsize(path + '/fanart.jpg') == 0:
- print('[+]Image Downloaded!', path + '/fanart.jpg')
- return
- i = 1
- while i <= int(config['proxy']['retry']):
- if os.path.getsize(path + '/fanart.jpg') == 0:
- print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
- DownloadFileWithFilename(cover, 'fanart.jpg', path, Config, filepath, failed_folder)
- i = i + 1
- continue
- else:
- break
- if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
- print('[!]Image Download Failed! Trying again.')
- DownloadFileWithFilename(cover, number + c_word + '.jpg', path, Config, filepath, failed_folder)
- print('[+]Image Downloaded!', path + '/fanart.jpg')
- elif option == 'kodi':
- if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder) == 'failed':
- moveFailedFolder(filepath, failed_folder)
- return
- DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder)
- if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
- print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
- return
- i = 1
- while i <= int(config['proxy']['retry']):
- if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
- print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
- DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, Config, filepath, failed_folder)
- i = i + 1
- continue
- else:
- break
- print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
-
-
-def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag):
- title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
- try:
- if not os.path.exists(path):
- os.makedirs(path)
- if option == 'plex':
- with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
- print('', file=code)
- print("", file=code)
- print(" " + naming_rule + part + "", file=code)
- print(" ", file=code)
- print(" ", file=code)
- print(" " + studio + "+", file=code)
- print(" " + year + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + str(runtime).replace(" ", "") + "", file=code)
- print(" " + director + "", file=code)
- print(" poster.jpg", file=code)
- print(" thumb.png", file=code)
- print(" fanart.jpg", file=code)
- try:
- for key, value in actor_photo.items():
- print(" ", file=code)
- print(" " + key + "", file=code)
- if not value == '': # or actor_photo == []:
- print(" " + value + "", file=code)
- print(" ", file=code)
- except:
- aaaa = ''
- print(" " + studio + "", file=code)
- print(" ", file=code)
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- try:
- for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
- print(" " + i + "", file=code)
- except:
- aaaaa = ''
- try:
- for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
- print(" " + i + "", file=code)
- except:
- aaaaaaaa = ''
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- print(" " + number + "", file=code)
- print(" " + release + "", file=code)
- print(" " + cover + "", file=code)
- print(" " + website + "", file=code)
- print("", file=code)
- print("[+]Writeed! " + path + "/" + number + ".nfo")
- elif option == 'emby':
- with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
- print('', file=code)
- print("", file=code)
- print(" " + naming_rule + part + "", file=code)
- print(" ", file=code)
- print(" ", file=code)
- print(" " + studio + "+", file=code)
- print(" " + year + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + str(runtime).replace(" ", "") + "", file=code)
- print(" " + director + "", file=code)
- print(" " + number + c_word + ".png", file=code)
- print(" " + number + c_word + ".png", file=code)
- print(" " + number + c_word + '.jpg' + "", file=code)
- try:
- for key, value in actor_photo.items():
- print(" ", file=code)
- print(" " + key + "", file=code)
- if not value == '': # or actor_photo == []:
- print(" " + value + "", file=code)
- print(" ", file=code)
- except:
- aaaa = ''
- print(" " + studio + "", file=code)
- print(" ", file=code)
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaa = ''
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaaaaa = ''
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- print(" " + number + "", file=code)
- print(" " + release + "", file=code)
- print(" " + cover + "", file=code)
- print(" " + website + "", file=code)
- print("", file=code)
- print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
- elif option == 'kodi':
- with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
- print('', file=code)
- print("", file=code)
- print(" " + naming_rule + part + "", file=code)
- print(" ", file=code)
- print(" ", file=code)
- print(" " + studio + "+", file=code)
- print(" " + year + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + outline + "", file=code)
- print(" " + str(runtime).replace(" ", "") + "", file=code)
- print(" " + director + "", file=code)
- print(" " + number + c_word + "-poster.jpg", file=code)
- print(" " + number + c_word + '-fanart.jpg' + "", file=code)
- try:
- for key, value in actor_photo.items():
- print(" ", file=code)
- print(" " + key + "", file=code)
- if not value == '': # or actor_photo == []:
- print(" " + value + "", file=code)
- print(" ", file=code)
- except:
- aaaa = ''
- print(" " + studio + "", file=code)
- print(" ", file=code)
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaa = ''
- try:
- for i in tag:
- print(" " + i + "", file=code)
- except:
- aaaaaaaa = ''
- if cn_sub == '1':
- print(" 中文字幕", file=code)
- print(" " + number + "", file=code)
- print(" " + release + "", file=code)
- print(" " + cover + "", file=code)
- print(" " + website + "", file=code)
- print("", file=code)
- print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
- except IOError as e:
- print("[-]Write Failed!")
- print(e)
- moveFailedFolder(filepath, failed_folder)
- return
- except Exception as e1:
- print(e1)
- print("[-]Write Failed!")
- moveFailedFolder(filepath, failed_folder)
- return
-
-
-def cutImage(option, imagecut, path, number, c_word):
- if option == 'plex':
- if imagecut == 1:
- try:
- img = Image.open(path + '/fanart.jpg')
- imgSize = img.size
- w = img.width
- h = img.height
- img2 = img.crop((w / 1.9, 0, w, h))
- img2.save(path + '/poster.jpg')
- except:
- print('[-]Cover cut failed!')
- elif imagecut == 0:
- img = Image.open(path + '/fanart.jpg')
- w = img.width
- h = img.height
- img.save(path + '/poster.jpg')
- elif option == 'emby':
- if imagecut == 1:
- try:
- img = Image.open(path + '/' + number + c_word + '.jpg')
- imgSize = img.size
- w = img.width
- h = img.height
- img2 = img.crop((w / 1.9, 0, w, h))
- img2.save(path + '/' + number + c_word + '.png')
- except:
- print('[-]Cover cut failed!')
- elif imagecut == 0:
- img = Image.open(path + '/' + number + c_word + '.jpg')
- w = img.width
- h = img.height
- img.save(path + '/' + number + c_word + '.png')
- elif option == 'kodi':
- if imagecut == 1:
- try:
- img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
- imgSize = img.size
- w = img.width
- h = img.height
- img2 = img.crop((w / 1.9, 0, w, h))
- img2.save(path + '/' + number + c_word + '-poster.jpg')
- except:
- print('[-]Cover cut failed!')
- elif imagecut == 0:
- img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
- w = img.width
- h = img.height
- try:
- img = img.convert('RGB')
- img.save(path + '/' + number + c_word + '-poster.jpg')
- except:
- img = img.convert('RGB')
- img.save(path + '/' + number + c_word + '-poster.jpg')
-
-
-def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置
- houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group())
- try:
- if config['common']['soft_link'] == '1': # 如果soft_link=1 使用软链接
- os.symlink(filepath, path + '/' + number + c_word + houzhui)
- else:
- os.rename(filepath, path + '/' + number + c_word + houzhui)
- if os.path.exists(os.getcwd() + '/' + number + c_word + '.srt'): # 字幕移动
- os.rename(os.getcwd() + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt')
- print('[+]Sub moved!')
- elif os.path.exists(os.getcwd() + '/' + number + c_word + '.ssa'):
- os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa')
- print('[+]Sub moved!')
- elif os.path.exists(os.getcwd() + '/' + number + c_word + '.sub'):
- os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub')
- print('[+]Sub moved!')
- except FileExistsError:
- print('[-]File Exists! Please check your movie!')
- print('[-]move to the root folder of the program.')
- return
- except PermissionError:
- print('[-]Error! Please run as administrator!')
- return
-
-
-def pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word): # 文件路径,番号,后缀,要移动至的位置
- if multi_part == 1:
- number += part # 这时number会被附加上CD1后缀
- houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|WEBM|avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath).group())
- try:
- if config['common']['soft_link'] == '1':
- os.symlink(filepath, path + '/' + number + part + c_word + houzhui)
- else:
- os.rename(filepath, path + '/' + number + part + c_word + houzhui)
- if os.path.exists(number + '.srt'): # 字幕移动
- os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt')
- print('[+]Sub moved!')
- elif os.path.exists(number + part + c_word + '.ass'):
- os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass')
- print('[+]Sub moved!')
- elif os.path.exists(number + part + c_word + '.sub'):
- os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub')
- print('[+]Sub moved!')
- print('[!]Success')
- except FileExistsError:
- print('[-]File Exists! Please check your movie!')
- print('[-]move to the root folder of the program.')
- return
- except PermissionError:
- print('[-]Error! Please run as administrator!')
- return
-
-
-def copyRenameJpgToBackdrop(option, path, number, c_word):
- if option == 'plex':
- shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg')
- shutil.copy(path + '/poster.jpg', path + '/thumb.png')
- if option == 'emby':
- shutil.copy(path + '/' + number + c_word + '.jpg', path + '/Backdrop.jpg')
- if option == 'kodi':
- shutil.copy(path + '/' + number + c_word + '-fanart.jpg', path + '/Backdrop.jpg')
-
-
-def get_part(filepath, failed_folder):
- try:
- if re.search('-CD\d+', filepath):
- return re.findall('-CD\d+', filepath)[0]
- if re.search('-cd\d+', filepath):
- return re.findall('-cd\d+', filepath)[0]
- except:
- print("[-]failed!Please rename the filename again!")
- moveFailedFolder(filepath, failed_folder)
- return
-
-
-def debug_mode(json_data):
- try:
- if config['debug_mode']['switch'] == '1':
- print('[+] ---Debug info---')
- for i, v in json_data.items():
- if i == 'outline':
- print('[+] -', i, ' :', len(v), 'characters')
- continue
- if i == 'actor_photo' or i == 'year':
- continue
- print('[+] -', "%-11s" % i, ':', v)
- print('[+] ---Debug info---')
- except:
- aaa = ''
-
-
-def core_main(file_path, number_th):
- # =======================================================================初始化所需变量
- multi_part = 0
- part = ''
- c_word = ''
- option = ''
- cn_sub = ''
- config_file = 'config.ini'
- Config = ConfigParser()
- Config.read(config_file, encoding='UTF-8')
- try:
- option = ReadMediaWarehouse()
- except:
- print('[-]Config media_warehouse read failed!')
- program_mode = Config['common']['main_mode'] # 运行模式
- failed_folder = Config['common']['failed_output_folder'] # 失败输出目录
- success_folder = Config['common']['success_output_folder'] # 成功输出目录
- filepath = file_path # 影片的路径
- number = number_th
- json_data = getDataFromJSON(number, filepath, failed_folder) # 定义番号
- if json_data["number"] != number:
- # fix issue #119
- # the root cause is we normalize the search id
- # PrintFiles() will use the normalized id from website,
- # but pasteFileToFolder() still use the input raw search id
- # so the solution is: use the normalized search id
- number = json_data["number"]
- imagecut = json_data['imagecut']
- tag = json_data['tag']
- # =======================================================================判断-C,-CD后缀
- if '-CD' in filepath or '-cd' in filepath:
- multi_part = 1
- part = get_part(filepath, failed_folder)
- if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
- cn_sub = '1'
- c_word = '-C' # 中文字幕影片后缀
-
- CreatFailedFolder(failed_folder) # 创建输出失败目录
- debug_mode(json_data) # 调试模式检测
- path = creatFolder(success_folder, json_data['location_rule'], json_data, Config) # 创建文件夹
- # =======================================================================刮削模式
- if program_mode == '1':
- if multi_part == 1:
- number += part # 这时number会被附加上CD1后缀
- smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, Config, filepath, failed_folder) # 检查小封面
- imageDownload(option, json_data['cover'], number, c_word, path, multi_part, Config, filepath, failed_folder) # creatFoder会返回番号路径
- cutImage(option, imagecut, path, number, c_word) # 裁剪图
- copyRenameJpgToBackdrop(option, path, number, c_word)
- PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, failed_folder, tag) # 打印文件
- pasteFileToFolder(filepath, path, number, c_word) # 移动文件
- # =======================================================================整理模式
- elif program_mode == '2':
- pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件
+# -*- coding: utf-8 -*-
+
+import os.path
+import shutil
+from PIL import Image
+import json
+from ADC_function import *
+from MediaServer import *
+from AV_Data_Capture import config
+import lazyxml
+# =========website========
+from SiteSource import avsox, javdb, fc2fans_club, javbus, fanza, mgstage
+import requests
+from enum import Enum, auto
+
+
+# =====================本地文件处理===========================
+
+def escapePath(path, escapeLiterals): # Remove escape literals
+ # escapeLiterals = Config['escape']['literals']
+ backslash = '\\'
+ for literal in escapeLiterals:
+ path = path.replace(backslash + literal, '')
+ return path
+
+
+def moveFailedFolder(filepath, failed_folder):
+ if failed_folder.strip() == '':
+ print('[+]Failed output folder is Empty')
+ else:
+ print('[-]Move to Failed output folder')
+ shutil.move(filepath, failed_folder)
+ return
+
+
+def CreatFailedFolder(failed_folder):
+ if not os.path.exists(failed_folder + '/'): # 新建failed文件夹
+ try:
+ os.makedirs(failed_folder + '/')
+ except:
+ print("[-]failed!can not be make Failed output folder\n[-](Please run as Administrator)")
+ return
+
+ # 根据番号获取字典数据
+
+
+class SiteSource(Enum):
+ AVSOX = auto()
+ FC2 = auto()
+ FANZA = auto()
+ JAVDB = auto()
+ JAVBUS = auto()
+ MGSTAGE = auto()
+
+
+def getDataFromJSON(file_number): # 从JSON返回元数据
+ """
+ iterate through all services and fetch the data
+ """
+
+ func_mapping = {
+ "avsox": avsox.main,
+ "fc2": fc2fans_club.main,
+ "fanza": fanza.main,
+ "javdb": javdb.main,
+ "javbus": javbus.main,
+ "mgstage": mgstage.main,
+ }
+
+ # default fetch order list, from the begining to the end
+ sources = ["javbus", "javdb", "fanza", "mgstage", "fc2", "avsox"]
+
+ # if the input file name matches centain rules,
+ # move some web service to the begining of the list
+ if re.match(r"^\d{5,}", file_number) or re.match(r'heyzo', file_number, re.IGNORECASE):
+ sources.insert(0, sources.pop(sources.index("avsox")))
+ elif re.match(r"\d+\D+", file_number) or re.match(r'siro', file_number, re.IGNORECASE):
+ sources.insert(0, sources.pop(sources.index("mgstage")))
+ sources.insert(0, sources.pop(sources.index("fanza")))
+ elif re.match(r'fc2', file_number, re.IGNORECASE):
+ sources.insert(0, sources.pop(sources.index("fc2")))
+
+ for source in sources:
+ json_data = json.loads(func_mapping[source](file_number))
+ # if any service return a valid return, break
+ if getDataState(json_data) != 0:
+ break
+
+ # ================================================网站规则添加结束================================================
+
+ title = json_data['title']
+ actor_list = str(json_data['actor']).strip("[ ]").replace("'", '').split(',') # 字符串转列表
+ release = json_data['release']
+ number = json_data['number']
+ studio = json_data['studio']
+ source = json_data['source']
+ runtime = json_data['runtime']
+ outline = json_data['runtime']
+ label = json_data['label']
+ year = json_data['year']
+ try:
+ cover_small = json_data['cover_small']
+ except:
+ cover_small = ''
+
+ imagecut = json_data['imagecut']
+ tag = str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',') # 字符串转列表 @
+ actor = str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
+
+ if title == '' or number == '':
+ raise Exception('[-]Movie Data not found!')
+
+ # if imagecut == '3':
+ # DownloadFileWithFilename()
+
+ # ====================处理异常字符====================== #\/:*?"<>|
+ title = re.sub(r'[#\\/:*?"<>|\]]', '', title, 0, re.IGNORECASE)
+ release = release.replace('/', '-')
+ tmpArr = cover_small.split(',')
+ if len(tmpArr) > 0:
+ cover_small = tmpArr[0].strip('\"').strip('\'')
+ # ====================处理异常字符 END================== #\/:*?"<>|
+
+ naming_rule = eval(config.naming_rule)
+ location_rule = eval(config.location_rule)
+
+ # 返回处理后的json_data
+ json_data['title'] = title
+ json_data['actor'] = actor
+ json_data['release'] = release
+ json_data['cover_small'] = cover_small
+ json_data['tag'] = tag
+ json_data['naming_rule'] = naming_rule
+ json_data['location_rule'] = location_rule
+ json_data['year'] = year
+ return json_data
+
+
+def get_info(json_data): # 返回json里的数据
+ title = json_data['title']
+ studio = json_data['studio']
+ year = json_data['year']
+ outline = json_data['outline']
+ runtime = json_data['runtime']
+ director = json_data['director']
+ actor_photo = json_data['actor_photo']
+ release = json_data['release']
+ number = json_data['number']
+ cover = json_data['cover']
+ website = json_data['website']
+ return title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website
+
+
+def download_cover_file(url, name, folder_path):
+ """
+ download small cover
+ :param url: url
+ :param name: name same as movie's name without ext
+ :param folder_path: dir to save
+ :return:
+ """
+ filename = config.media_server.poster_name(name)
+ DownloadFileWithFilename(url, filename, folder_path)
+
+
+def smallCoverCheck(path, number, imagecut, cover_small, c_word, option, filepath, failed_folder):
+ if imagecut == 3:
+ if option == 'emby':
+ DownloadFileWithFilename(cover_small, '1.jpg', path, filepath, failed_folder)
+ try:
+ img = Image.open(path + '/1.jpg')
+ except Exception:
+ img = Image.open('1.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/' + number + c_word + '.png')
+ time.sleep(1)
+ os.remove(path + '/1.jpg')
+ if option == 'kodi':
+ DownloadFileWithFilename(cover_small, '1.jpg', path, filepath, failed_folder)
+ try:
+ img = Image.open(path + '/1.jpg')
+ except Exception:
+ img = Image.open('1.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/' + number + c_word + '-poster.jpg')
+ time.sleep(1)
+ os.remove(path + '/1.jpg')
+ if option == 'plex':
+ DownloadFileWithFilename(cover_small, '1.jpg', path, filepath, failed_folder)
+ try:
+ img = Image.open(path + '/1.jpg')
+ except Exception:
+ img = Image.open('1.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/poster.jpg')
+ os.remove(path + '/1.jpg')
+
+
+def creatFolder(success_folder, location_rule, json_data, escapeLiterals): # 创建文件夹
+ title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
+ if len(location_rule) > 240: # 新建成功输出文件夹
+ path = success_folder + '/' + location_rule.replace("'actor'", "'manypeople'", 3).replace("actor",
+ "'manypeople'",
+ 3) # path为影片+元数据所在目录
+ else:
+ path = success_folder + '/' + location_rule
+ # print(path)
+ if not os.path.exists(path):
+ path = escapePath(path, escapeLiterals)
+ try:
+ os.makedirs(path)
+ except:
+ path = success_folder + '/' + location_rule.replace('/[' + number + ']-' + title, "/number")
+ path = escapePath(path, escapeLiterals)
+ os.makedirs(path)
+ return path
+
+
+# =====================资源下载部分===========================
+def download_file(url, folder, name_with_ext):
+ """
+ download file
+ :param url: source url
+ :param name_with_ext: full name like 'mike.jpg'
+ :param folder: folder path
+ :return: full path if downloaded file like '/Users/proj/AV_Data_Capture/mike.jpg'
+ """
+ proxy_dict = {"http": str(config.proxy), "https": str(config.proxy)} if config.proxy else None
+ i = 0
+ while i < config.retry:
+ try:
+ if not os.path.exists(folder):
+ os.makedirs(folder)
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ r = requests.get(url, headers=headers, timeout=config.timeout, proxies=proxy_dict)
+ if r == '':
+ print('[-]Movie Data not found!')
+ return
+ with open(str(folder) + "/" + name_with_ext, "wb") as code:
+ code.write(r.content)
+ return str(folder) + "/" + name_with_ext
+ except requests.exceptions.RequestException:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry))
+ except requests.exceptions.ConnectionError:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry))
+ except requests.exceptions.ProxyError:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry))
+ except requests.exceptions.ConnectTimeout:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(config.retry))
+
+
+def DownloadFileWithFilename(url, filename, path): # path = examle:photo , video.in the Project Folder!
+ proxy, timeout, retry_count = get_network_settings()
+ i = 0
+ proxy_dict = {"http": str(config.proxy), "https": str(config.proxy)} if proxy else None
+ while i < retry_count:
+ try:
+ if not os.path.exists(path):
+ os.makedirs(path)
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ r = requests.get(url, headers=headers, timeout=timeout,
+ proxies=proxy_dict)
+ if r == '':
+ print('[-]Movie Data not found!')
+ return
+ with open(str(path) + "/" + filename, "wb") as code:
+ code.write(r.content)
+ return
+ except requests.exceptions.RequestException:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ except requests.exceptions.ConnectionError:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ except requests.exceptions.ProxyError:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ except requests.exceptions.ConnectTimeout:
+ i += 1
+ print('[-]Image Download : Connect retry ' + str(i) + '/' + str(retry_count))
+ print('[-]Connect Failed! Please check your Proxy or Network!')
+ # moveFailedFolder(filepath, failed_folder)
+ return
+
+
+def download_image(url, name, folder):
+ """
+ download img
+ :param url: source
+ :param name: name
+ :param folder: folder to save
+ :return:
+ """
+ name_with_ext = config.media_server.image_name(name)
+ download_file(url, folder, name_with_ext)
+
+
+def imageDownload(option, cover, number, c_word, path, multi_part, filepath, failed_folder): # 封面是否下载成功,否则移动到failed
+ if option == 'emby': # name.jpg
+ if DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder) == 'failed':
+ moveFailedFolder(filepath, failed_folder)
+ return
+ DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder)
+ if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
+ return
+ i = 1
+ while i <= int(config.retry):
+ if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+ print('[!]Image Download Failed! Trying again. [' + config.retry + '/3]')
+ DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder)
+ i = i + 1
+ continue
+ else:
+ break
+ if multi_part == 1:
+ old_name = os.path.join(path, number + c_word + '.jpg')
+ new_name = os.path.join(path, number + c_word + '.jpg')
+ os.rename(old_name, new_name)
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
+ else:
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
+ elif option == 'plex': # fanart.jpg
+ if DownloadFileWithFilename(cover, 'fanart.jpg', path, filepath, failed_folder) == 'failed':
+ moveFailedFolder(filepath, failed_folder)
+ return
+ DownloadFileWithFilename(cover, 'fanart.jpg', path, filepath, failed_folder)
+ if not os.path.getsize(path + '/fanart.jpg') == 0:
+ print('[+]Image Downloaded!', path + '/fanart.jpg')
+ return
+ i = 1
+ while i <= int(config.retry):
+ if os.path.getsize(path + '/fanart.jpg') == 0:
+ print('[!]Image Download Failed! Trying again. [' + config.retry + '/3]')
+ DownloadFileWithFilename(cover, 'fanart.jpg', path, filepath, failed_folder)
+ i = i + 1
+ continue
+ else:
+ break
+ if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+ print('[!]Image Download Failed! Trying again.')
+ DownloadFileWithFilename(cover, number + c_word + '.jpg', path, filepath, failed_folder)
+ print('[+]Image Downloaded!', path + '/fanart.jpg')
+ elif option == 'kodi': # [name]-fanart.jpg
+ if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, filepath, failed_folder) == 'failed':
+ moveFailedFolder(filepath, failed_folder)
+ return
+ DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, filepath, failed_folder)
+ if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
+ return
+ i = 1
+ while i <= int(config.retry):
+ if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
+ print('[!]Image Download Failed! Trying again. [' + config.retry + '/3]')
+ DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path, filepath, failed_folder)
+ i = i + 1
+ continue
+ else:
+ break
+ print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
+
+
+def make_nfo_file(nfo, nfo_name, folder_path):
+ """
+ make xxx.nfo in folder
+ :param nfo_name: name
+ :param nfo: nfo dict
+ :param folder_path: where to create file, default temp_folder
+ :return:
+ """
+ title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(nfo)
+ naming_rule = nfo['naming_rule']
+ tag = nfo['tag']
+
+ path = folder_path
+ c_word = ''
+ cn_sub = ''
+ part = ''
+ # path_file = path + "/" + number + c_word + ".nfo", "wt"
+ path_file = path + "/" + nfo_name + c_word + ".nfo"
+ lazyxml.dump
+ try:
+ if not os.path.exists(path):
+ os.makedirs(path)
+ if config.media_server == MediaServer.PLEX:
+ with open(path_file, "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" poster.jpg", file=code)
+ print(" thumb.png", file=code)
+ print(" fanart.jpg", file=code)
+ try:
+ for key, value in actor_photo.items():
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ if not value == '': # or actor_photo == []:
+ print(" " + value + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in str(tag).strip("[ ]").replace("'", '').replace(" ", '').split(','):
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in str(tag).strip("[ ]").replace("'", '').replace(" ", '').split(','):
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + ".nfo")
+ elif config.media_server == MediaServer.EMBY:
+ with open(path_file, "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" " + number + c_word + ".png", file=code)
+ print(" " + number + c_word + ".png", file=code)
+ print(" " + number + c_word + '.jpg' + "", file=code)
+ try:
+ for key, value in actor_photo.items():
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ if not value == '': # or actor_photo == []:
+ print(" " + value + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
+ elif config.media_server == MediaServer.KODI:
+ with open(path_file, "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" " + number + c_word + "-poster.jpg", file=code)
+ print(" " + number + c_word + '-fanart.jpg' + "", file=code)
+ try:
+ for key, value in actor_photo.items():
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ if not value == '': # or actor_photo == []:
+ print(" " + value + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
+ except IOError as e:
+ print("[-]Write Failed! :" + e)
+ # print(e)
+ # moveFailedFolder(filepath, failed_folder)
+ return
+ except Exception as e:
+ print("[-]Write Failed! :" + e)
+ # moveFailedFolder(filepath, failed_folder)
+ return
+
+
+def PrintFiles(option, path, c_word, naming_rule, part, cn_sub, json_data, filepath, failed_folder, tag):
+ title, studio, year, outline, runtime, director, actor_photo, release, number, cover, website = get_info(json_data)
+ try:
+ if not os.path.exists(path):
+ os.makedirs(path)
+ if option == 'plex':
+ with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" poster.jpg", file=code)
+ print(" thumb.png", file=code)
+ print(" fanart.jpg", file=code)
+ try:
+ for key, value in actor_photo.items():
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ if not value == '': # or actor_photo == []:
+ print(" " + value + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(','):
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + ".nfo")
+ elif option == 'emby':
+ with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" " + number + c_word + ".png", file=code)
+ print(" " + number + c_word + ".png", file=code)
+ print(" " + number + c_word + '.jpg' + "", file=code)
+ try:
+ for key, value in actor_photo.items():
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ if not value == '': # or actor_photo == []:
+ print(" " + value + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
+ elif option == 'kodi':
+ with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+ print('', file=code)
+ print("", file=code)
+ print(" " + naming_rule + part + "", file=code)
+ print(" ", file=code)
+ print(" ", file=code)
+ print(" " + studio + "+", file=code)
+ print(" " + year + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + outline + "", file=code)
+ print(" " + str(runtime).replace(" ", "") + "", file=code)
+ print(" " + director + "", file=code)
+ print(" " + number + c_word + "-poster.jpg", file=code)
+ print(" " + number + c_word + '-fanart.jpg' + "", file=code)
+ try:
+ for key, value in actor_photo.items():
+ print(" ", file=code)
+ print(" " + key + "", file=code)
+ if not value == '': # or actor_photo == []:
+ print(" " + value + "", file=code)
+ print(" ", file=code)
+ except:
+ aaaa = ''
+ print(" " + studio + "", file=code)
+ print(" ", file=code)
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaa = ''
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa = ''
+ if cn_sub == '1':
+ print(" 中文字幕", file=code)
+ print(" " + number + "", file=code)
+ print(" " + release + "", file=code)
+ print(" " + cover + "", file=code)
+ print(" " + website + "", file=code)
+ print("", file=code)
+ print("[+]Writeed! " + path + "/" + number + c_word + ".nfo")
+ except IOError as e:
+ print("[-]Write Failed!")
+ print(e)
+ moveFailedFolder(filepath, failed_folder)
+ return
+ except Exception as e1:
+ print(e1)
+ print("[-]Write Failed!")
+ moveFailedFolder(filepath, failed_folder)
+ return
+
+
+def crop_image(crop_style, name, path):
+ try:
+ origin_image = Image.open(path + '/' + config.media_server.image_name(name))
+ if crop_style == 1:
+ cropped_image = origin_image.crop((origin_image.width / 1.9, 0, origin_image.width, origin_image.height))
+ else:
+ cropped_image = origin_image
+ cropped_image.save(path + '/' + config.media_server.poster_name(name))
+
+ except Exception as e:
+ print('[-]Cover cut failed:' + e)
+
+
+def cutImage(option, imagecut, path, number, c_word):
+ if option == 'plex':
+ if imagecut == 1: # 截取右侧封面 fanart.jpg 截取为poster.jpg
+ try:
+ img = Image.open(path + '/fanart.jpg')
+ imgSize = img.size
+ w = img.width
+ h = img.height
+ img2 = img.crop((w / 1.9, 0, w, h))
+ img2.save(path + '/poster.jpg')
+ except:
+ print('[-]Cover cut failed!')
+ elif imagecut == 0: # 改名 fanart.jpg ->poster.jpg
+ img = Image.open(path + '/fanart.jpg')
+ w = img.width
+ h = img.height
+ img.save(path + '/poster.jpg')
+ elif option == 'emby':
+ if imagecut == 1: # 截取右侧封面 [name].jpg 截取为 [name].jpg
+ try:
+ img = Image.open(path + '/' + number + c_word + '.jpg')
+ imgSize = img.size
+ w = img.width
+ h = img.height
+ img2 = img.crop((w / 1.9, 0, w, h))
+ img2.save(path + '/' + number + c_word + '.png')
+ except:
+ print('[-]Cover cut failed!')
+ elif imagecut == 0: # [name].jpg -> [name].png
+ img = Image.open(path + '/' + number + c_word + '.jpg')
+ img.save(path + '/' + number + c_word + '.png')
+ elif option == 'kodi':
+ if imagecut == 1: # 截取右侧封面 [name]-fanart.jpg 截取为 [name]-poster.jpg
+ try:
+ img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
+ w = img.width
+ h = img.height
+ img2 = img.crop((w / 1.9, 0, w, h))
+ img2.save(path + '/' + number + c_word + '-poster.jpg')
+ except:
+ print('[-]Cover cut failed!')
+ elif imagecut == 0: # [name]-fanart.jpg 截取为 [name]-poster.jpg
+ img = Image.open(path + '/' + number + c_word + '-fanart.jpg')
+
+ try:
+ img = img.convert('RGB')
+ img.save(path + '/' + number + c_word + '-poster.jpg')
+ except:
+ img = img.convert('RGB')
+ img.save(path + '/' + number + c_word + '-poster.jpg')
+
+
+def pasteFileToFolder(filepath, path, number, c_word): # 文件路径,番号,后缀,要移动至的位置
+ houzhui = str(re.search('[.](avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath, re.IGNORECASE).group())
+ try:
+ if config.soft_link == '1': # 如果soft_link=1 使用软链接
+ os.symlink(filepath, path + '/' + number + c_word + houzhui)
+ else:
+ os.rename(filepath, path + '/' + number + c_word + houzhui)
+ if os.path.exists(config.search_folder + '/' + number + c_word + '.srt'): # 字幕移动
+ os.rename(config.search_folder + '/' + number + c_word + '.srt', path + '/' + number + c_word + '.srt')
+ print('[+]Sub moved!')
+ elif os.path.exists(config.search_folder + '/' + number + c_word + '.ssa'):
+ os.rename(os.getcwd() + '/' + number + c_word + '.ssa', path + '/' + number + c_word + '.ssa')
+ print('[+]Sub moved!')
+ elif os.path.exists(config.search_folder + '/' + number + c_word + '.sub'):
+ os.rename(os.getcwd() + '/' + number + c_word + '.sub', path + '/' + number + c_word + '.sub')
+ print('[+]Sub moved!')
+ except FileExistsError:
+ print('[-]File Exists! Please check your movie!')
+ print('[-]move to the root folder of the program.')
+ return
+ except PermissionError:
+ print('[-]Error! Please run as administrator!')
+ return
+
+
+def pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word): # 文件路径,番号,后缀,要移动至的位置
+ if multi_part == 1:
+ number += part # 这时number会被附加上CD1后缀
+ houzhui = str(re.search('[.](avi|rmvb|wmv|mov|mp4|mkv|flv|ts|webm)$', filepath, re.IGNORECASE).group())
+ try:
+ if config.soft_link== '1':
+ os.symlink(filepath, path + '/' + number + part + c_word + houzhui)
+ else:
+ os.rename(filepath, path + '/' + number + part + c_word + houzhui)
+ if os.path.exists(number + '.srt'): # 字幕移动
+ os.rename(number + part + c_word + '.srt', path + '/' + number + part + c_word + '.srt')
+ print('[+]Sub moved!')
+ elif os.path.exists(number + part + c_word + '.ass'):
+ os.rename(number + part + c_word + '.ass', path + '/' + number + part + c_word + '.ass')
+ print('[+]Sub moved!')
+ elif os.path.exists(number + part + c_word + '.sub'):
+ os.rename(number + part + c_word + '.sub', path + '/' + number + part + c_word + '.sub')
+ print('[+]Sub moved!')
+ print('[!]Success')
+ except FileExistsError:
+ print('[-]File Exists! Please check your movie!')
+ print('[-]move to the root folder of the program.')
+ return
+ except PermissionError:
+ print('[-]Error! Please run as administrator!')
+ return
+
+
+def copy_images_to_background_image(name, path):
+ shutil.copy(path + "/" + config.media_server.image_name(name), path + "/Backdrop.jpg")
+ if config.media_server == MediaServer.PLEX:
+ shutil.copy(path + "/" + config.media_server.poster_name(name), path + '/thumb.png')
+
+
+def copyRenameJpgToBackdrop(option, path, number, c_word):
+ if option == 'plex':
+ shutil.copy(path + '/fanart.jpg', path + '/Backdrop.jpg')
+ shutil.copy(path + '/poster.jpg', path + '/thumb.png')
+ if option == 'emby':
+ shutil.copy(path + '/' + number + c_word + '.jpg', path + '/Backdrop.jpg')
+ if option == 'kodi':
+ shutil.copy(path + '/' + number + c_word + '-fanart.jpg', path + '/Backdrop.jpg')
+
+
+def get_part(filepath, failed_folder):
+ try:
+ if re.search('-CD\d+', filepath):
+ return re.findall('-CD\d+', filepath)[0]
+ if re.search('-cd\d+', filepath):
+ return re.findall('-cd\d+', filepath)[0]
+ except:
+ print("[-]failed!Please rename the filename again!")
+ moveFailedFolder(filepath, failed_folder)
+ return
+
+
+def debug_mode(json_data):
+ try:
+ if config.debug_mode == '1':
+ print('[+] ---Debug info---')
+ for i, v in json_data.items():
+ if i == 'outline':
+ print('[+] -', i, ' :', len(v), 'characters')
+ continue
+ if i == 'actor_photo' or i == 'year':
+ continue
+ print('[+] -', "%-11s" % i, ':', v)
+ print('[+] ---Debug info---')
+ except:
+ aaa = ''
+
+
+def core_main(number_th):
+ # =======================================================================初始化所需变量
+ multi_part = 0
+ part = ''
+ c_word = ''
+ option = ''
+ cn_sub = ''
+
+ # filepath = file_path # 影片的路径
+ number = number_th
+
+ json_data = getDataFromJSON(number) # 定义番号
+
+ # if json_data.get('number') != number:
+ # fix issue #119
+ # the root cause is we normalize the search id
+ # PrintFiles() will use the normalized id from website,
+ # but pasteFileToFolder() still use the input raw search id
+ # so the solution is: use the normalized search id
+ # number = json_data["number"]
+ # imagecut = json_data['imagecut']
+ # tag = json_data['tag']
+ # =======================================================================判断-C,-CD后缀
+ # if '-CD' in filepath or '-cd' in filepath:
+ # multi_part = 1
+ # part = get_part(filepath, config.failed_folder)
+
+ # if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
+ # cn_sub = '1'
+ # c_word = '-C' # 中文字幕影片后缀
+
+ # CreatFailedFolder(config.failed_folder) # 创建输出失败目录
+ # debug_mode(json_data) # 调试模式检测
+ return json_data
+ # path = creatFolder(config.success_folder, json_data['location_rule'], json_data, config.escape_literals) # 创建文件夹
+ # =======================================================================刮削模式
+ # if config.program_mode == '1':
+ # if multi_part == 1:
+ # number += part # 这时number会被附加上CD1后缀
+ # smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, filepath, config.failed_folder) # 检查小封面
+ # imageDownload(option, json_data['cover'], number, c_word, path, multi_part, filepath, config.failed_folder) # creatFoder会返回番号路径
+ # cutImage(option, imagecut, path, number, c_word) # 裁剪图
+ # copyRenameJpgToBackdrop(option, path, number, c_word)
+ # PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, config.failed_folder, tag) # 打印文件 .nfo
+ # pasteFileToFolder(filepath, path, number, c_word) # 移动文件
+ # # =======================================================================整理模式
+ # elif config.program_mode == '2':
+ # pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word) # 移动文件
diff --git a/jav321.py b/jav321.py
deleted file mode 100644
index 1259553..0000000
--- a/jav321.py
+++ /dev/null
@@ -1,73 +0,0 @@
-import json
-from bs4 import BeautifulSoup
-from lxml import html
-from ADC_function import post_html
-
-
-def main(number: str) -> json:
- result = post_html(url="https://www.jav321.com/search", query={"sn": number})
- soup = BeautifulSoup(result.text, "html.parser")
- lx = html.fromstring(str(soup))
-
- if "/video/" in result.url:
- data = parse_info(soup=soup)
- dic = {
- "title": get_title(lx=lx),
- "studio": "",
- "year": data["release"][:4],
- "outline": get_outline(lx=lx),
- "director": "",
- "cover": get_cover(lx=lx),
- "imagecut": 1,
- "actor_photo": "",
- "website": result.url,
- "source": "jav321.py",
- **data,
- }
- else:
- dic = {}
-
- return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
-
-
-def get_title(lx: html.HtmlElement) -> str:
- return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip()
-
-
-def parse_info(soup: BeautifulSoup) -> dict:
- data = str(soup.select_one("div.row > div.col-md-9")).split("
")
-
- return {
- "actor": get_anchor_info(h=data[0]),
- "label": get_anchor_info(h=data[1]),
- "tag": get_anchor_info(h=data[2]),
- "number": get_text_info(h=data[3]),
- "release": get_text_info(h=data[4]),
- "runtime": get_text_info(h=data[5]),
- }
-
-
-def get_anchor_info(h: str) -> str:
- result = []
-
- data = BeautifulSoup(h, "html.parser").find_all("a", href=True)
- for d in data:
- result.append(d.text)
-
- return ",".join(result)
-
-
-def get_text_info(h: str) -> str:
- return h.split(": ")[1]
-
-
-def get_cover(lx: html.HtmlElement) -> str:
- return lx.xpath("/html/body/div[2]/div[2]/div[1]/p/a/img/@src")[0]
-
-
-def get_outline(lx: html.HtmlElement) -> str:
- return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0]
-
-
-if __name__ == "__main__":
- print(main("wmc-002"))
diff --git a/readme/._readme1.PNG b/readme/._readme1.PNG
new file mode 100755
index 0000000..0ea2059
Binary files /dev/null and b/readme/._readme1.PNG differ
diff --git a/readme/._readme2.PNG b/readme/._readme2.PNG
new file mode 100755
index 0000000..388f797
Binary files /dev/null and b/readme/._readme2.PNG differ
diff --git a/readme/._readme4.PNG b/readme/._readme4.PNG
new file mode 100755
index 0000000..eb24b60
Binary files /dev/null and b/readme/._readme4.PNG differ
diff --git a/readme/This is readms.md's images folder b/readme/This is readms.md's images folder
old mode 100644
new mode 100755
diff --git a/readme/flow_chart2.png b/readme/flow_chart2.png
old mode 100644
new mode 100755
diff --git a/readme/readme1.PNG b/readme/readme1.PNG
old mode 100644
new mode 100755
diff --git a/readme/readme2.PNG b/readme/readme2.PNG
old mode 100644
new mode 100755
diff --git a/readme/readme3.PNG b/readme/readme3.PNG
old mode 100644
new mode 100755
diff --git a/readme/readme4.PNG b/readme/readme4.PNG
old mode 100644
new mode 100755
diff --git a/readme/single.gif b/readme/single.gif
old mode 100644
new mode 100755
diff --git a/resource/This is readms.md's images folder b/resource/This is readms.md's images folder
new file mode 100755
index 0000000..d00491f
--- /dev/null
+++ b/resource/This is readms.md's images folder
@@ -0,0 +1 @@
+1
diff --git a/resource/flow_chart2.png b/resource/flow_chart2.png
new file mode 100755
index 0000000..4daf728
Binary files /dev/null and b/resource/flow_chart2.png differ
diff --git a/resource/readme1.PNG b/resource/readme1.PNG
new file mode 100755
index 0000000..b3d0a21
Binary files /dev/null and b/resource/readme1.PNG differ
diff --git a/resource/readme2.PNG b/resource/readme2.PNG
new file mode 100755
index 0000000..f002931
Binary files /dev/null and b/resource/readme2.PNG differ
diff --git a/resource/readme3.PNG b/resource/readme3.PNG
new file mode 100755
index 0000000..81e05cd
Binary files /dev/null and b/resource/readme3.PNG differ
diff --git a/resource/readme4.PNG b/resource/readme4.PNG
new file mode 100755
index 0000000..26a2cf4
Binary files /dev/null and b/resource/readme4.PNG differ
diff --git a/resource/ruquirments.txt b/resource/ruquirments.txt
new file mode 100755
index 0000000..97951df
--- /dev/null
+++ b/resource/ruquirments.txt
@@ -0,0 +1 @@
+pipenv install -rlxml bs4 pillow pyquery
\ No newline at end of file
diff --git a/resource/single.gif b/resource/single.gif
new file mode 100755
index 0000000..4b9c371
Binary files /dev/null and b/resource/single.gif differ
diff --git a/ruquirments.txt b/ruquirments.txt
deleted file mode 100644
index aa091a0..0000000
--- a/ruquirments.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-lxml
-bs4
-pillow
-pyquery
\ No newline at end of file
diff --git a/test.py b/test.py
new file mode 100755
index 0000000..5ebb19e
--- /dev/null
+++ b/test.py
@@ -0,0 +1,80 @@
+import os
+import re
+from itertools import groupby
+
+import fuckit as fuckit
+import pandas as pd
+from tenacity import retry, stop_after_delay, wait_fixed
+
+
+def go():
+ a = [1, 2, 3, 4, 5, 6]
+ # [print(x) for x in a]
+ # [print(x) for x in a]
+ a1 = groupby(a, key=lambda k: (k / 2))
+ for i in a1:
+ print(i)
+ for i in a1:
+ print(i)
+
+
+class TryDo:
+ def __init__(self, func, times=3):
+ self.tries = times
+ self.func = func
+
+ def __iter__(self):
+ self.currentTry = 1
+ return self
+
+ def __next__(self):
+ if self.currentTry > self.tries:
+ raise StopIteration(False)
+ else:
+ self.currentTry += 1
+ self.func()
+ raise StopIteration(True)
+
+ # def do(self):
+
+
+@retry(stop=stop_after_delay(3), wait=wait_fixed(2))
+def stop_after_10_s():
+ print("Stopping after 10 seconds")
+ raise Exception
+
+
+# f = iter( TryDo(do_something, 5))
+
+# stop_after_10_s()
+def errorfunc():
+ raise Exception
+
+
+def okfunc():
+ print("ok")
+
+
+# with fuckit:
+# errorfunc()
+# okfunc()
+# re.match()
+
+r = re.search(r'(?<=999)-?((?P([A-Z](?![A-Z])))|(?P\d(?!\d)))', "IPTD-999-B-彼女の姉貴とイケナイ関係-RIO", re.I)
+#
+print(r.groupdict())
+print(r.groupdict()['alpha'])
+print(r.group(2))
+import re
+
+line = "Cats are smarter than dogs"
+matchObj = re.search(r'(?<=a)(.*) are (.*?) .*', line, re.M | re.I)
+if matchObj:
+ print("matchObj.group() : ", matchObj.group())
+ print("matchObj.group(1) : ", matchObj.group(1))
+ print("matchObj.group(2) : ", matchObj.group(2))
+else:
+ print("No match!!")
+
+# print(r[-1])
+# print(newList)
diff --git a/update_check.json b/update_check.json
old mode 100644
new mode 100755