Compare commits
	
		
			1 Commits
		
	
	
		
			master
			...
			revert-53-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|  | 566a5658b9 | 
							
								
								
									
										1
									
								
								.gitattributes
									
									
									
									
										vendored
									
									
								
							
							
						
						| @ -1 +0,0 @@ | ||||
| *.py text=auto eol=lf | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						| @ -1,4 +1,3 @@ | ||||
| *.DS_Store | ||||
| # Byte-compiled / optimized / DLL files | ||||
| __pycache__/ | ||||
| *.py[cod] | ||||
|  | ||||
							
								
								
									
										2
									
								
								.idea/.gitignore
									
									
									
										generated
									
									
										vendored
									
									
								
							
							
						
						| @ -1,2 +0,0 @@ | ||||
| # Default ignored files | ||||
| /workspace.xml | ||||
							
								
								
									
										8
									
								
								.idea/AV_Data_Capture.iml
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,8 +0,0 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <module type="PYTHON_MODULE" version="4"> | ||||
|   <component name="NewModuleRootManager"> | ||||
|     <content url="file://$MODULE_DIR$" /> | ||||
|     <orderEntry type="jdk" jdkName="Python 3.8 (AV_Data_Capture)" jdkType="Python SDK" /> | ||||
|     <orderEntry type="sourceFolder" forTests="false" /> | ||||
|   </component> | ||||
| </module> | ||||
							
								
								
									
										19
									
								
								.idea/dictionaries/tanpengsccd.xml
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,19 +0,0 @@ | ||||
| <component name="ProjectDictionaryState"> | ||||
|   <dictionary name="tanpengsccd"> | ||||
|     <words> | ||||
|       <w>avsox</w> | ||||
|       <w>emby</w> | ||||
|       <w>fanart</w> | ||||
|       <w>fanza</w> | ||||
|       <w>javbus</w> | ||||
|       <w>javdb</w> | ||||
|       <w>jellyfin</w> | ||||
|       <w>khtml</w> | ||||
|       <w>kodi</w> | ||||
|       <w>mgstage</w> | ||||
|       <w>plex</w> | ||||
|       <w>pondo</w> | ||||
|       <w>rmvb</w> | ||||
|     </words> | ||||
|   </dictionary> | ||||
| </component> | ||||
							
								
								
									
										6
									
								
								.idea/inspectionProfiles/profiles_settings.xml
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,6 +0,0 @@ | ||||
| <component name="InspectionProjectProfileManager"> | ||||
|   <settings> | ||||
|     <option name="USE_PROJECT_PROFILE" value="false" /> | ||||
|     <version value="1.0" /> | ||||
|   </settings> | ||||
| </component> | ||||
							
								
								
									
										7
									
								
								.idea/misc.xml
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,7 +0,0 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <project version="4"> | ||||
|   <component name="JavaScriptSettings"> | ||||
|     <option name="languageLevel" value="ES6" /> | ||||
|   </component> | ||||
|   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (AV_Data_Capture)" project-jdk-type="Python SDK" /> | ||||
| </project> | ||||
							
								
								
									
										8
									
								
								.idea/modules.xml
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,8 +0,0 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <project version="4"> | ||||
|   <component name="ProjectModuleManager"> | ||||
|     <modules> | ||||
|       <module fileurl="file://$PROJECT_DIR$/.idea/AV_Data_Capture.iml" filepath="$PROJECT_DIR$/.idea/AV_Data_Capture.iml" /> | ||||
|     </modules> | ||||
|   </component> | ||||
| </project> | ||||
							
								
								
									
										6
									
								
								.idea/other.xml
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,6 +0,0 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <project version="4"> | ||||
|   <component name="PySciProjectComponent"> | ||||
|     <option name="PY_SCI_VIEW_SUGGESTED" value="true" /> | ||||
|   </component> | ||||
| </project> | ||||
							
								
								
									
										6
									
								
								.idea/vcs.xml
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,6 +0,0 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <project version="4"> | ||||
|   <component name="VcsDirectoryMappings"> | ||||
|     <mapping directory="$PROJECT_DIR$" vcs="Git" /> | ||||
|   </component> | ||||
| </project> | ||||
							
								
								
									
										144
									
								
								ADC_function.py
									
									
									
									
									
								
							
							
						
						| @ -7,110 +7,72 @@ import os | ||||
| import re | ||||
| import time | ||||
| import sys | ||||
| from lxml import etree | ||||
| import sys | ||||
| import io | ||||
| from ConfigApp import ConfigApp | ||||
| # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) | ||||
| # sys.setdefaultencoding('utf-8') | ||||
| 
 | ||||
| # config_file='config.ini' | ||||
| # config = ConfigParser() | ||||
| config_file='config.ini' | ||||
| config = ConfigParser() | ||||
| 
 | ||||
| # if os.path.exists(config_file): | ||||
| #     try: | ||||
| #         config.read(config_file, encoding='UTF-8') | ||||
| #     except: | ||||
| #         print('[-]Config.ini read failed! Please use the offical file!') | ||||
| # else: | ||||
| #     print('[+]config.ini: not found, creating...',end='') | ||||
| #     with open("config.ini", "wt", encoding='UTF-8') as code: | ||||
| #         print("[common]", file=code) | ||||
| #         print("main_mode = 1", file=code) | ||||
| #         print("failed_output_folder = failed", file=code) | ||||
| #         print("success_output_folder = JAV_output", file=code) | ||||
| #         print("", file=code) | ||||
| #         print("[proxy]",file=code) | ||||
| #         print("proxy=127.0.0.1:1081",file=code) | ||||
| #         print("timeout=10", file=code) | ||||
| #         print("retry=3", file=code) | ||||
| #         print("", file=code) | ||||
| #         print("[Name_Rule]", file=code) | ||||
| #         print("location_rule=actor+'/'+number",file=code) | ||||
| #         print("naming_rule=number+'-'+title",file=code) | ||||
| #         print("", file=code) | ||||
| #         print("[update]",file=code) | ||||
| #         print("update_check=1",file=code) | ||||
| #         print("", file=code) | ||||
| #         print("[media]", file=code) | ||||
| #         print("media_warehouse=emby", file=code) | ||||
| #         print("#emby plex kodi", file=code) | ||||
| #         print("", file=code) | ||||
| #         print("[escape]", file=code) | ||||
| #         print("literals=\\", file=code) | ||||
| #         print("", file=code) | ||||
| #         print("[movie_location]", file=code) | ||||
| #         print("path=", file=code) | ||||
| #         print("", file=code) | ||||
| #         print('.',end='') | ||||
| #     time.sleep(2) | ||||
| #     print('.') | ||||
| #     print('[+]config.ini: created!') | ||||
| #     print('[+]Please restart the program!') | ||||
| #     time.sleep(4) | ||||
| #     os._exit(0) | ||||
| #     try: | ||||
| #         config.read(config_file, encoding='UTF-8') | ||||
| #     except: | ||||
| #         print('[-]Config.ini read failed! Please use the offical file!') | ||||
| 
 | ||||
| config = ConfigApp() | ||||
| 
 | ||||
| 
 | ||||
| def get_network_settings(): | ||||
| if os.path.exists(config_file): | ||||
|     try: | ||||
|         proxy = config.proxy | ||||
|         timeout = int(config.timeout) | ||||
|         retry_count = int(config.retry) | ||||
|         assert timeout > 0 | ||||
|         assert retry_count > 0 | ||||
|         config.read(config_file, encoding='UTF-8') | ||||
|     except: | ||||
|         raise ValueError("[-]Proxy config error! Please check the config.") | ||||
|     return proxy, timeout, retry_count | ||||
| 
 | ||||
| def getDataState(json_data):  # 元数据获取失败检测 | ||||
|     if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null': | ||||
|         return 0 | ||||
|     else: | ||||
|         return 1 | ||||
|         print('[-]Config.ini read failed! Please use the offical file!') | ||||
| else: | ||||
|     print('[+]config.ini: not found, creating...') | ||||
|     with open("config.ini", "wt", encoding='UTF-8') as code: | ||||
|         print("[proxy]",file=code) | ||||
|         print("proxy=127.0.0.1:1080",file=code) | ||||
|         print("timeout=10", file=code) | ||||
|         print("retry=3", file=code) | ||||
|         print("", file=code) | ||||
|         print("[Name_Rule]", file=code) | ||||
|         print("location_rule='JAV_output/'+actor+'/'+number",file=code) | ||||
|         print("naming_rule=number+'-'+title",file=code) | ||||
|         print("", file=code) | ||||
|         print("[update]",file=code) | ||||
|         print("update_check=1",file=code) | ||||
|         print("", file=code) | ||||
|         print("[media]", file=code) | ||||
|         print("media_warehouse=emby", file=code) | ||||
|         print("#emby or plex", file=code) | ||||
|         print("#plex only test!", file=code) | ||||
|         print("", file=code) | ||||
|         print("[directory_capture]", file=code) | ||||
|         print("switch=0", file=code) | ||||
|         print("directory=", file=code) | ||||
|         print("", file=code) | ||||
|         print("everyone switch:1=on, 0=off", file=code) | ||||
|     time.sleep(2) | ||||
|     print('[+]config.ini: created!') | ||||
|     try: | ||||
|         config.read(config_file, encoding='UTF-8') | ||||
|     except: | ||||
|         print('[-]Config.ini read failed! Please use the offical file!') | ||||
| 
 | ||||
| def ReadMediaWarehouse(): | ||||
|     return config.media_server | ||||
|     return config['media']['media_warehouse'] | ||||
| 
 | ||||
| def UpdateCheckSwitch(): | ||||
|     check=str(config.update_check) | ||||
|     check=str(config['update']['update_check']) | ||||
|     if check == '1': | ||||
|         return '1' | ||||
|     elif check == '0': | ||||
|         return '0' | ||||
|     elif check == '': | ||||
|         return '0' | ||||
| 
 | ||||
| def getXpathSingle(htmlcode,xpath): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath(xpath)).strip(" ['']") | ||||
|     return result1 | ||||
| 
 | ||||
| def get_html(url,cookies = None):#网页请求核心 | ||||
|     proxy, timeout, retry_count = get_network_settings() | ||||
|     try: | ||||
|         proxy = config['proxy']['proxy'] | ||||
|         timeout = int(config['proxy']['timeout']) | ||||
|         retry_count = int(config['proxy']['retry']) | ||||
|     except: | ||||
|         print('[-]Proxy config error! Please check the config.') | ||||
|     i = 0 | ||||
|     print(url) | ||||
|     while i < retry_count: | ||||
|         try: | ||||
|             if not proxy == '': | ||||
|                 proxies = {"http": proxy, "https": proxy} | ||||
|             if not str(config['proxy']['proxy']) == '': | ||||
|                 proxies = {"http": "http://" + proxy,"https": "https://" + proxy} | ||||
|                 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'} | ||||
|                 getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies) | ||||
|                 getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies) | ||||
|                 getweb.encoding = 'utf-8' | ||||
|                 return getweb.text | ||||
|             else: | ||||
| @ -118,8 +80,16 @@ def get_html(url,cookies = None):#网页请求核心 | ||||
|                 getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies) | ||||
|                 getweb.encoding = 'utf-8' | ||||
|                 return getweb.text | ||||
|         except Exception as e: | ||||
|             print(e) | ||||
|         except requests.exceptions.RequestException: | ||||
|             i += 1 | ||||
|             print('[-]Connect retry '+str(i)+'/'+str(retry_count)) | ||||
|         except requests.exceptions.ConnectionError: | ||||
|             i += 1 | ||||
|             print('[-]Connect retry '+str(i)+'/'+str(retry_count)) | ||||
|         except requests.exceptions.ProxyError: | ||||
|             i += 1 | ||||
|             print('[-]Connect retry '+str(i)+'/'+str(retry_count)) | ||||
|         except requests.exceptions.ConnectTimeout: | ||||
|             i += 1 | ||||
|             print('[-]Connect retry '+str(i)+'/'+str(retry_count)) | ||||
|     print('[-]Connect Failed! Please check your Proxy or Network!') | ||||
|  | ||||
| @ -4,413 +4,150 @@ | ||||
| import glob | ||||
| import os | ||||
| import time | ||||
| import fuckit | ||||
| from tenacity import retry, stop_after_delay, wait_fixed | ||||
| import re | ||||
| import sys | ||||
| from ADC_function import * | ||||
| import json | ||||
| import shutil | ||||
| import itertools | ||||
| import argparse | ||||
| from pathlib import Path | ||||
| from configparser import ConfigParser | ||||
| os.chdir(os.getcwd()) | ||||
| 
 | ||||
| from core import * | ||||
| from ConfigApp import ConfigApp | ||||
| from PathNameProcessor import PathNameProcessor | ||||
| # ============global var=========== | ||||
| 
 | ||||
| # TODO 封装聚合解耦:CORE | ||||
| # TODO (学习)统一依赖管理工具 | ||||
| # TODO 不同媒体服务器尽量兼容统一一种元数据 如nfo 海报等(emby,jellyfin,plex) | ||||
| # TODO 字幕整理功能 文件夹中读取所有字幕 并提番号放入对应缓存文件夹中TEMP | ||||
| version='1.3' | ||||
| 
 | ||||
| config = ConfigApp() | ||||
| config = ConfigParser() | ||||
| config.read(config_file, encoding='UTF-8') | ||||
| 
 | ||||
| Platform = sys.platform | ||||
| 
 | ||||
| def safe_list_get(list_in, idx, default=None): | ||||
|     """ | ||||
|     数组安全取值 | ||||
|     :param list_in: | ||||
|     :param idx: | ||||
|     :param default: | ||||
|     :return: | ||||
|     """ | ||||
|     try: | ||||
|         return list_in[idx] | ||||
|     except IndexError: | ||||
|         return default | ||||
| # ==========global var end========= | ||||
| 
 | ||||
| 
 | ||||
| def UpdateCheck(version): | ||||
| def UpdateCheck(): | ||||
|     if UpdateCheckSwitch() == '1': | ||||
|         html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json') | ||||
|         html = json.loads(str(html2)) | ||||
| 
 | ||||
|         if not version == html['version']: | ||||
|             print('[*]                  * New update ' + html['version'] + ' *') | ||||
|             print('[*]                     ↓ Download ↓') | ||||
|             print('[*]           * New update ' + html['version'] + ' *') | ||||
|             print('[*]             * Download *') | ||||
|             print('[*] ' + html['download']) | ||||
|             print('[*]======================================================') | ||||
|             print('[*]=====================================') | ||||
|     else: | ||||
|         print('[+]Update Check disabled!') | ||||
| 
 | ||||
| 
 | ||||
| def argparse_get_file(): | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument("file", default='', nargs='?', help="Write the file path on here") | ||||
|     args = parser.parse_args() | ||||
|     if args.file == '': | ||||
|         return '' | ||||
|     else: | ||||
|         return args.file | ||||
| 
 | ||||
| 
 | ||||
| def movie_lists(escape_folders): | ||||
|     escape_folders = re.split('[,,]', escape_folders) | ||||
|     total = [] | ||||
| 
 | ||||
|     for root, dirs, files in os.walk(config.search_folder): | ||||
|         if root in escape_folders: | ||||
|             continue | ||||
|         for file in files: | ||||
|             if re.search(PathNameProcessor.pattern_of_file_name_suffixes, file, re.IGNORECASE): | ||||
|                 path = os.path.join(root, file) | ||||
|                 total.append(path) | ||||
| def movie_lists(): | ||||
|     global exclude_directory_1 | ||||
|     global exclude_directory_2 | ||||
|     directory = config['directory_capture']['directory'] | ||||
|     total=[] | ||||
|     file_type = ['mp4','avi','rmvb','wmv','mov','mkv','flv','ts'] | ||||
|     exclude_directory_1 = config['common']['failed_output_folder'] | ||||
|     exclude_directory_2 = config['common']['success_output_folder'] | ||||
|     if directory=='*': | ||||
|         remove_total = [] | ||||
|         for o in file_type: | ||||
|             remove_total += glob.glob(r"./" + exclude_directory_1 + "/*." + o) | ||||
|             remove_total += glob.glob(r"./" + exclude_directory_2 + "/*." + o) | ||||
|         for i in os.listdir(os.getcwd()): | ||||
|             for a in file_type: | ||||
|                 total += glob.glob(r"./" + i + "/*." + a) | ||||
|         for b in remove_total: | ||||
|             total.remove(b) | ||||
|         return total | ||||
|     for a in file_type: | ||||
|         total += glob.glob(r"./" + directory + "/*." + a) | ||||
|     return total | ||||
| def CreatFailedFolder(): | ||||
|     if not os.path.exists('failed/'):  # 新建failed文件夹 | ||||
|         try: | ||||
|             os.makedirs('failed/') | ||||
|         except: | ||||
|             print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)") | ||||
|             os._exit(0) | ||||
| def lists_from_test(custom_nuber): #电影列表 | ||||
|     a=[] | ||||
|     a.append(custom_nuber) | ||||
|     return a | ||||
| def CEF(path): | ||||
|     try: | ||||
|         files = os.listdir(path)  # 获取路径下的子文件(夹)列表 | ||||
|         for file in files: | ||||
|             os.removedirs(path + '/' + file)  # 删除这个空文件夹 | ||||
|             print('[+]Deleting empty folder', path + '/' + file) | ||||
|     except: | ||||
|         a='' | ||||
| def rreplace(self, old, new, *max): | ||||
| #从右开始替换文件名中内容,源字符串,将被替换的子字符串, 新字符串,用于替换old子字符串,可选字符串, 替换不超过 max 次 | ||||
|     count = len(self) | ||||
|     if max and str(max[0]).isdigit(): | ||||
|         count = max[0] | ||||
|     return new.join(self.rsplit(old, count)) | ||||
| def getNumber(filepath): | ||||
|     filepath = filepath.replace('.\\','') | ||||
|     try:  # 普通提取番号 主要处理包含减号-的番号 | ||||
|         filepath = filepath.replace("_", "-") | ||||
|         filepath.strip('22-sht.me').strip('-HD').strip('-hd') | ||||
|         filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath))  # 去除文件名中时间 | ||||
|         try: | ||||
|             file_number = re.search('\w+-\d+', filename).group() | ||||
|         except:  # 提取类似mkbd-s120番号 | ||||
|             file_number = re.search('\w+-\w+\d+', filename).group() | ||||
|         return file_number | ||||
|     except:  # 提取不含减号-的番号 | ||||
|         try: | ||||
|             filename = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot') | ||||
|             filename = str(re.sub(".*?\.com-\d+", "", filename)).replace('_', '') | ||||
|             file_number = str(re.search('\w+\d{4}', filename).group(0)) | ||||
|             return file_number | ||||
|         except:  # 提取无减号番号 | ||||
|             filename = str(re.sub("ts6\d", "", filepath))  # 去除ts64/265 | ||||
|             filename = str(re.sub(".*?\.com-\d+", "", filename)) | ||||
|             file_number = str(re.match('\w+', filename).group()) | ||||
|             file_number = str(file_number.replace(re.match("^[A-Za-z]+", file_number).group(),re.match("^[A-Za-z]+", file_number).group() + '-')) | ||||
|             return file_number | ||||
| 
 | ||||
| def RunCore(): | ||||
|     if Platform == 'win32': | ||||
|         if os.path.exists('core.py'): | ||||
|             os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动(用于源码py) | ||||
|         elif os.path.exists('core.exe'): | ||||
|             os.system('core.exe' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从exe启动(用于EXE版程序) | ||||
|         elif os.path.exists('core.py') and os.path.exists('core.exe'): | ||||
|             os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动(用于源码py) | ||||
|     else: | ||||
|         if os.path.exists('core.py'): | ||||
|             os.system('python3 core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动(用于源码py) | ||||
|         elif os.path.exists('core.exe'): | ||||
|             os.system('core.exe' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从exe启动(用于EXE版程序) | ||||
|         elif os.path.exists('core.py') and os.path.exists('core.exe'): | ||||
|             os.system('python3 core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动(用于源码py) | ||||
| 
 | ||||
| # def CEF(path): | ||||
| #     try: | ||||
| #         files = os.listdir(path)  # 获取路径下的子文件(夹)列表 | ||||
| #         for file in files: | ||||
| #             os.removedirs(path + '/' + file)  # 删除这个空文件夹 | ||||
| #             print('[+]Deleting empty folder', path + '/' + file) | ||||
| #     except: | ||||
| #         a = '' | ||||
| # | ||||
| 
 | ||||
| 
 | ||||
| def get_numbers(paths): | ||||
|     """提取对应路径的番号+集数""" | ||||
| 
 | ||||
|     def get_number(filepath, absolute_path=False): | ||||
|         """ | ||||
|         获取番号,集数 | ||||
|         :param filepath: | ||||
|         :param absolute_path: | ||||
|         :return: | ||||
|         """ | ||||
|         name = filepath.upper()  # 转大写 | ||||
|         if absolute_path: | ||||
|             name = name.replace('\\', '/') | ||||
|         # 移除干扰字段 | ||||
|         name = PathNameProcessor.remove_distractions(name) | ||||
|         # 抽取 文件路径中可能存在的尾部集数,和抽取尾部集数的后的文件路径 | ||||
|         suffix_episode, name = PathNameProcessor.extract_suffix_episode(name) | ||||
|         # 抽取 文件路径中可能存在的 番号后跟随的集数 和 处理后番号 | ||||
|         episode_behind_code, code_number = PathNameProcessor.extract_code(name) | ||||
|         # 无番号 则设置空字符 | ||||
|         code_number = code_number if code_number else '' | ||||
|         # 优先取尾部集数,无则取番号后的集数(几率低),都无则为空字符 | ||||
|         episode = suffix_episode if suffix_episode else episode_behind_code if episode_behind_code else '' | ||||
| 
 | ||||
|         return code_number, episode | ||||
| 
 | ||||
|     maps = {} | ||||
|     for path in paths: | ||||
|         number, episode = get_number(path) | ||||
|         maps[path] = (number, episode) | ||||
| 
 | ||||
|     return maps | ||||
| 
 | ||||
| 
 | ||||
| def create_folder(paths): | ||||
|     for path_to_make in paths: | ||||
|         if path_to_make: | ||||
|             try: | ||||
|                 os.makedirs(path_to_make) | ||||
|             except FileExistsError as e: | ||||
|                 # name = f'{folder=}'.split('=')[0].split('.')[-1] | ||||
|                 print(path_to_make + " 已经存在") | ||||
|                 pass | ||||
|             except Exception as exception: | ||||
|                 print('! 创建文件夹 ' + path_to_make + ' 失败,文件夹路径错误或权限不够') | ||||
|                 raise exception | ||||
|         else: | ||||
|             raise Exception('!创建的文件夹路径为空,请确认') | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|     version = '2.8.2' | ||||
| 
 | ||||
|     print('[*]================== AV Data Capture ===================') | ||||
|     print('[*]                    Version ' + version) | ||||
|     print('[*]======================================================') | ||||
| 
 | ||||
|     # UpdateCheck(version) | ||||
| 
 | ||||
|     CreatFailedFolder(config.failed_folder) | ||||
| if __name__ =='__main__': | ||||
|     print('[*]===========AV Data Capture===========') | ||||
|     print('[*]             Version '+version) | ||||
|     print('[*]=====================================') | ||||
|     CreatFailedFolder() | ||||
|     UpdateCheck() | ||||
|     os.chdir(os.getcwd()) | ||||
| 
 | ||||
|     # 创建文件夹 | ||||
|     create_folder([config.failed_folder, config.search_folder, config.temp_folder]) | ||||
|     count = 0 | ||||
|     count_all = str(len(movie_lists())) | ||||
|     print('[+]Find',str(len(movie_lists())),'movies') | ||||
|     for i in movie_lists(): #遍历电影列表 交给core处理 | ||||
|         count = count + 1 | ||||
|         percentage = str(count/int(count_all)*100)[:4]+'%' | ||||
|         print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -') | ||||
|         try: | ||||
|             print("[!]Making Data for   [" + i + "], the number is [" + getNumber(i) + "]") | ||||
|             RunCore() | ||||
|             print("[*]=====================================") | ||||
|         except:  # 番号提取异常 | ||||
|             print('[-]' + i + ' Cannot catch the number :') | ||||
|             print('[-]Move ' + i + ' to failed folder') | ||||
|             shutil.move(i, str(os.getcwd()) + '/' + 'failed/') | ||||
|             continue | ||||
| 
 | ||||
|     # temp 文件夹中infos放 番号json信息,pics中放图片信息 | ||||
|     path_infos = config.temp_folder + '/infos' | ||||
|     path_pics = config.temp_folder + '/pics' | ||||
| 
 | ||||
|     create_folder([path_infos, path_pics]) | ||||
| 
 | ||||
|     # 遍历搜索目录下所有视频的路径 | ||||
|     movie_list = movie_lists(config.escape_folder) | ||||
| 
 | ||||
|     # 以下是从文本中提取测试的数据 | ||||
|     # f = open('TestPathNFO.txt', 'r') | ||||
|     # f = open('TestPathSpecial.txt', 'r') | ||||
|     # movie_list = [line[:-1] for line in f.readlines()] | ||||
|     # f.close() | ||||
| 
 | ||||
|     # 获取 番号,集数,路径  的字典->list | ||||
|     code_ep_paths = [[codeEposode[0], codeEposode[1], path] for path, codeEposode in get_numbers(movie_list).items()] | ||||
|     [print(i) for i in code_ep_paths] | ||||
|     #  按番号分组片子列表(重点),用于寻找相同番号的片子 | ||||
|     ''' | ||||
|     这里利用pandas分组 "https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html" | ||||
|      | ||||
|     ''' | ||||
|     # # 设置打印时显示所有列 | ||||
|     # pd.set_option('display.max_columns', None) | ||||
|     # # 显示所有行 | ||||
|     # pd.set_option('display.max_rows', None) | ||||
|     # # 设置value的显示长度为100,默认为50 | ||||
|     # pd.set_option('max_colwidth', 30) | ||||
|     # # 创建框架 | ||||
|     # df = pd.DataFrame(code_ep_paths, columns=('code', 'ep', 'path')) | ||||
|     # # 以番号分组 | ||||
|     # groupedCode_code_ep_paths = df.groupby(['code']) | ||||
|     # # print(df.groupby(['code', 'ep']).describe().unstack()) | ||||
|     # grouped_code_ep = df.groupby(['code', 'ep'])['path'] | ||||
|     # | ||||
|     sorted_code_list = sorted(code_ep_paths, key=lambda code_ep_path: code_ep_path[0]) | ||||
|     group_code_list = itertools.groupby(sorted_code_list, key=lambda code_ep_path: code_ep_path[0]) | ||||
| 
 | ||||
| 
 | ||||
|     def group_code_list_to_dict(group_code_list): | ||||
|         data_dict = {} | ||||
|         for code, code_ep_path_group in group_code_list: | ||||
|             code_ep_path_list = list(code_ep_path_group) | ||||
|             eps_of_code = {} | ||||
|             group_ep_list = itertools.groupby(code_ep_path_list, key=lambda code_ep_path: code_ep_path[1]) | ||||
|             for ep, group_ep_group in group_ep_list: | ||||
|                 group_ep_list = list(group_ep_group) | ||||
|                 eps_of_code[ep] = [code_ep_path[2] for code_ep_path in group_ep_list] | ||||
|             data_dict[code] = eps_of_code | ||||
| 
 | ||||
|         return data_dict | ||||
| 
 | ||||
| 
 | ||||
|     def print_same_code_ep_path(data_dict_in): | ||||
|         for code_in in data_dict_in: | ||||
|             ep_path_list = data_dict_in[code_in] | ||||
|             if len(ep_path_list) > 1: | ||||
|                 print('--' * 60) | ||||
|                 print("|" + (code_in if code_in else 'unknown') + ":") | ||||
| 
 | ||||
|                 # group_ep_list = itertools.groupby(code_ep_path_list.items(), key=lambda code_ep_path: code_ep_path[0]) | ||||
|                 for ep in ep_path_list: | ||||
|                     path_list = ep_path_list[ep] | ||||
|                     print('--' * 12) | ||||
|                     ep = ep if ep else ' ' | ||||
|                     if len(path_list) == 1: | ||||
|                         print('|           集数:' + ep + ' 文件: ' + path_list[0]) | ||||
|                     else: | ||||
|                         print('|           集数:' + ep + ' 文件: ') | ||||
|                         for path in path_list: | ||||
|                             print('|                       ' + path) | ||||
| 
 | ||||
|             else: | ||||
|                 pass | ||||
| 
 | ||||
| 
 | ||||
|     # 分好组的数据 {code:{ep:[path]}} | ||||
|     data_dict_groupby_code_ep = group_code_list_to_dict(group_code_list) | ||||
| 
 | ||||
|     print('--' * 100) | ||||
|     print("找到影片数量:" + str(len(movie_list))) | ||||
|     print("合计番号数量:" + str(len(data_dict_groupby_code_ep)) + "  (多个相同番号的影片只统计一个,不能识别的番号 都统一为'unknown')") | ||||
|     print('Warning:!!!! 以下为相同番号的电影明细') | ||||
|     print('◤' + '--' * 80) | ||||
|     print_same_code_ep_path(data_dict_groupby_code_ep) | ||||
|     print('◣' + '--' * 80) | ||||
| 
 | ||||
|     isContinue = input('任意键继续? N 退出 \n') | ||||
|     if isContinue.strip(' ') == "N": | ||||
|         exit(1) | ||||
| 
 | ||||
| 
 | ||||
|     # ========== 野鸡番号拖动 ========== | ||||
|     # number_argparse = argparse_get_file() | ||||
|     # if not number_argparse == '': | ||||
|     #     print("[!]Making Data for   [" + number_argparse + "], the number is [" + getNumber(number_argparse, | ||||
|     #                                                                                         absolute_path=True) + "]") | ||||
|     #     nfo = core_main(number_argparse, getNumber(number_argparse, absolute_path=True)) | ||||
|     #     print("[*]======================================================") | ||||
|     #     CEF(config.success_folder) | ||||
|     #     CEF(config.failed_folder) | ||||
|     #     print("[+]All finished!!!") | ||||
|     #     input("[+][+]Press enter key exit, you can check the error messge before you exit.") | ||||
|     #     os._exit(0) | ||||
|     # ========== 野鸡番号拖动 ========== | ||||
| 
 | ||||
|     def download_code_infos(code_list, is_read_cache=True): | ||||
|         """ | ||||
|          遍历按番号分组的集合,刮取番号信息并缓存 | ||||
| 
 | ||||
|         :param is_read_cache: 是否读取缓存数据 | ||||
|         :param code_list: | ||||
|         :return: {code:nfo} | ||||
|         """ | ||||
|         count_all_grouped = len(code_list) | ||||
|         count = 0 | ||||
|         code_info_dict = {} | ||||
| 
 | ||||
|         for code in code_list: | ||||
|             count = count + 1 | ||||
|             percentage = str(count / int(count_all_grouped) * 100)[:4] + '%' | ||||
|             print('[!] - ' + percentage + ' [' + str(count) + '/' + str(count_all_grouped) + '] -') | ||||
|             try: | ||||
|                 print("[!]搜刮数据 [" + code + "]") | ||||
|                 if code: | ||||
|                     # 创建番号的文件夹 | ||||
|                     file_path = path_infos + '/' + code + '.json' | ||||
|                     nfo = {} | ||||
|                     # 读取缓存信息,如果没有则联网搜刮 | ||||
| 
 | ||||
|                     path = Path(file_path) | ||||
|                     if is_read_cache and (path.exists() and path.is_file() and path.stat().st_size > 0): | ||||
|                         print('找到缓存信息') | ||||
|                         with open(file_path) as fp: | ||||
|                             nfo = json.load(fp) | ||||
|                     else: | ||||
| 
 | ||||
|                         # 核心功能 - 联网抓取信息字典 | ||||
|                         print('联网搜刮') | ||||
|                         nfo = core_main(code) | ||||
|                         print('正在写入', end='') | ||||
| 
 | ||||
|                         # 把缓存信息写入缓存文件夹中,有时会设备占用而失败,重试即可 | ||||
|                         @retry(stop=stop_after_delay(3), wait=wait_fixed(2)) | ||||
|                         def read_file(): | ||||
|                             with open(file_path, 'w') as fp: | ||||
|                                 json.dump(nfo, fp) | ||||
| 
 | ||||
|                         read_file() | ||||
|                         print('完成!') | ||||
|                     # 将番号信息放入字典 | ||||
|                     code_info_dict[code] = nfo | ||||
|                     print("[*]======================================================") | ||||
| 
 | ||||
|             except Exception as e:  # 番号的信息获取失败 | ||||
|                 code_info_dict[code] = '' | ||||
|                 print("找不到信息:" + code + ',Reason:' + str(e)) | ||||
| 
 | ||||
|                 # if config.soft_link: | ||||
|                 #     print('[-]Link', file_path_name, 'to failed folder') | ||||
|                 #     os.symlink(file_path_name, config.failed_folder + '/') | ||||
|                 # else: | ||||
|                 #     try: | ||||
|                 #         print('[-]Move ' + file_path_name + ' to failed folder:' + config.failed_folder) | ||||
|                 #         shutil.move(file_path_name, config.failed_folder + '/') | ||||
|                 #     except FileExistsError: | ||||
|                 #         print('[!]File exists in failed!') | ||||
|                 #     except: | ||||
|                 #         print('[+]skip') | ||||
|                 continue | ||||
|         return code_info_dict | ||||
| 
 | ||||
| 
 | ||||
|     print('----------------------------------') | ||||
|     code_infos = download_code_infos(data_dict_groupby_code_ep) | ||||
|     print("----未找到番号数据的番号----") | ||||
|     print([print(code) for code in code_infos if code_infos[code] == '']) | ||||
|     print("-------------------------") | ||||
| 
 | ||||
| 
 | ||||
|     def download_images_of_nfos(code_info_dict): | ||||
|         """ | ||||
|         遍历番号信息,下载番号电影的海报,图片 | ||||
|         :param code_info_dict: | ||||
|         :return: 无图片的信息的番号 | ||||
|         """ | ||||
| 
 | ||||
|         code_list_empty_image = [] | ||||
|         for code in code_info_dict: | ||||
|             nfo = code_info_dict[code] | ||||
|             if len(nfo.keys()) == 0: | ||||
|                 code_list_empty_image.append(code) | ||||
|                 continue | ||||
| 
 | ||||
|             code_pics_folder_to_save = path_pics + '/' + code | ||||
|             # 1 创建 番号文件夹 | ||||
|             os.makedirs(code_pics_folder_to_save, exist_ok=True) | ||||
|             #  下载缩略图 | ||||
|             if nfo['imagecut'] == 3:  # 3 是缩略图 | ||||
|                 path = Path(code_pics_folder_to_save + '/' + 'thumb.png') | ||||
|                 if path.exists() and path.is_file() and path.stat().st_size > 0: | ||||
|                     print(code + ':缩略图已有缓存') | ||||
|                 else: | ||||
|                     print(code + ':缩略图下载中...') | ||||
|                     download_file(nfo['cover_small'], code_pics_folder_to_save, 'thumb.png') | ||||
|                     print(code + ':缩略图下载完成') | ||||
|             #  下载海报 | ||||
|             path = Path(code_pics_folder_to_save + '/' + 'poster.png') | ||||
|             if path.exists() and path.is_file() and path.stat().st_size > 0: | ||||
|                 print(code + ':海报已有缓存') | ||||
|             else: | ||||
|                 print(code + ':海报下载中...') | ||||
|                 download_file(nfo['cover'], code_pics_folder_to_save, 'poster.png') | ||||
|                 print(code + ':海报下载完成') | ||||
|         return code_list_empty_image | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|     code_list_empty = download_images_of_nfos(code_infos) | ||||
|     print("----未找到集数的番号----") | ||||
|     print([print(code) for code in code_list_empty]) | ||||
|     print("------搜刮未找到集数的番号------") | ||||
|     code_infos_of_no_ep = download_code_infos(code_list_empty, is_read_cache=False) | ||||
|     print("----还是未找到番号数据的番号----") | ||||
|     print([print(code) for code in code_infos_of_no_ep if code_infos_of_no_ep[code] == '']) | ||||
|     print("----------------------") | ||||
|     # 开始操作 | ||||
|     # # 2 创建缩略图海报 | ||||
|     # if nfo['imagecut'] == 3:  # 3 是缩略图 | ||||
|     #     download_cover_file(nfo['cover_small'], code, code_pics_folder_to_save) | ||||
|     # # 3 创建图 | ||||
|     # download_image(nfo['cover'], code, code_pics_folder_to_save) | ||||
|     # # 4 剪裁 | ||||
|     # crop_image(nfo['imagecut'], code, code_pics_folder_to_save) | ||||
|     # # 5 背景图 | ||||
|     # copy_images_to_background_image(code, code_pics_folder_to_save) | ||||
|     # 6 创建 mame.nfo(不需要,需要时从infos中josn文件转为nfo文件) | ||||
|     # make_nfo_file(nfo, code, temp_path_to_save) | ||||
|     # 相同番号处理:按集数添加-CD[X];视频格式 and 大小 分; | ||||
|     # TODO 方式1 刮削:添加nfo,封面,内容截图等 | ||||
|     # 6 创建 mame.nfo(不需要,需要时从infos中josn文件转为nfo文件) | ||||
|     make_nfo_file(nfo, code, temp_path_to_save) | ||||
|     # TODO 方式2 整理:按规则移动影片,字幕 到 演员,发行商,有无🐎 等 | ||||
| 
 | ||||
|     # if config.program_mode == '1': | ||||
|     #     if multi_part == 1: | ||||
|     #         number += part  # 这时number会被附加上CD1后缀 | ||||
|     #     smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, filepath, config.failed_folder)  # 检查小封面 | ||||
|     #     imageDownload(option, json_data['cover'], number, c_word, path, multi_part, filepath, config.failed_folder)  # creatFoder会返回番号路径 | ||||
|     #     cutImage(option, imagecut, path, number, c_word)  # 裁剪图 | ||||
|     #     copyRenameJpgToBackdrop(option, path, number, c_word) | ||||
|     #     PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, config.failed_folder, tag)  # 打印文件 .nfo | ||||
|     #     pasteFileToFolder(filepath, path, number, c_word)  # 移动文件 | ||||
|     #     # =======================================================================整理模式 | ||||
|     # elif config.program_mode == '2': | ||||
|     #     pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word)  # 移动文件 | ||||
| 
 | ||||
|     # CEF(config.success_folder) | ||||
|     # CEF(config.failed_folder) | ||||
|     CEF(exclude_directory_1) | ||||
|     CEF(exclude_directory_2) | ||||
|     print("[+]All finished!!!") | ||||
|     input("[+][+]Press enter key exit, you can check the error message before you exit.") | ||||
|     input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") | ||||
							
								
								
									
										28
									
								
								ConfigApp.py
									
									
									
									
									
								
							
							
						
						| @ -1,28 +0,0 @@ | ||||
| from configparser import ConfigParser | ||||
| 
 | ||||
| from MediaServer import MediaServer | ||||
| 
 | ||||
| 
 | ||||
| class ConfigApp: | ||||
|     def __init__(self): | ||||
|         config_file = 'config.ini' | ||||
|         config = ConfigParser() | ||||
|         config.read(config_file, encoding='UTF-8') | ||||
|         self.success_folder = config['common']['success_output_folder'] | ||||
|         self.failed_folder = config['common']['failed_output_folder']  # 失败输出目录 | ||||
|         self.escape_folder = config['escape']['folders']  # 多级目录刮削需要排除的目录 | ||||
|         self.search_folder = config['common']['search_folder']  # 搜索路径 | ||||
|         self.temp_folder = config['common']['temp_folder']  # 临时资源路径 | ||||
|         self.soft_link = (config['common']['soft_link'] == 1) | ||||
|         # self.escape_literals = (config['escape']['literals'] == 1) | ||||
|         self.naming_rule = config['Name_Rule']['naming_rule'] | ||||
|         self.location_rule = config['Name_Rule']['location_rule'] | ||||
| 
 | ||||
|         self.proxy = config['proxy']['proxy'] | ||||
|         self.timeout = float(config['proxy']['timeout']) | ||||
|         self.retry = int(config['proxy']['retry']) | ||||
|         self.media_server = MediaServer[config['media']['media_warehouse']] | ||||
|         self.update_check = config['update']['update_check'] | ||||
|         self.debug_mode = config['debug_mode']['switch'] | ||||
| 
 | ||||
| 
 | ||||
| @ -1,19 +0,0 @@ | ||||
| import pandas as pd | ||||
| import numpy as np | ||||
| 
 | ||||
| df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar', | ||||
|                          'foo', 'bar', 'foo', 'foo'], | ||||
|                    'B': ['one', 'one', 'two', 'three', | ||||
|                          'two', 'two', 'one', 'three'], | ||||
|                    'C': np.random.randn(8), | ||||
|                    'D': np.random.randn(8)}) | ||||
| 
 | ||||
| print(df) | ||||
| groupedA = df.groupby('A').describe() | ||||
| groupedAB = df.groupby(['A', 'B'])['C'] | ||||
| print('---'*18) | ||||
| for a, b in groupedAB: | ||||
|     print('--'*18) | ||||
|     print(a) | ||||
|     print('-' * 18) | ||||
|     print(b) | ||||
| @ -1,38 +0,0 @@ | ||||
| import pandas as pd | ||||
| import numpy as np | ||||
| 
 | ||||
| ''' | ||||
| python数据处理三剑客之一pandas | ||||
| https://pandas.pydata.org/pandas-docs/stable/user_guide  | ||||
| https://www.pypandas.cn/docs/getting_started/10min.html | ||||
| ''' | ||||
| 
 | ||||
| dates = pd.date_range('20130101', periods=6) | ||||
| df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) | ||||
| print(dates) | ||||
| print(df) | ||||
| 
 | ||||
| df2 = pd.DataFrame({'A': 1., | ||||
|                     'B': pd.Timestamp('20130102'), | ||||
|                     'C': pd.Series(1, index=list(range(4)), dtype='float32'), | ||||
|                     'D': np.array([3] * 4, dtype='int32'), | ||||
|                     'E': pd.Categorical(["test", "train", "test", "train"]), | ||||
|                     'F': 'foo'}) | ||||
| print(df2) | ||||
| print(df2.dtypes) | ||||
| print(df.head()) | ||||
| print(df.tail(5)) | ||||
| print(df.index) | ||||
| print(df.columns) | ||||
| df.describe() # 统计数据摘要 | ||||
| df.T # index columns互转 | ||||
| df.sort_index(axis=1, ascending=False) # 排序,axis=1 是columns,axis=1 是index | ||||
| df.sort_values(by='B') # 按值排序 按B列中的值排序 | ||||
| 
 | ||||
| # 切行 | ||||
| df.A | ||||
| df['A'] | ||||
| # 切行 | ||||
| df['20130102':'20130104'] | ||||
| df[0:3] | ||||
| 
 | ||||
| @ -1,28 +0,0 @@ | ||||
| from enum import Enum, auto | ||||
| 
 | ||||
| 
 | ||||
| class MediaServer(Enum): | ||||
|     EMBY = auto() | ||||
|     PLEX = auto() | ||||
|     KODI = auto() | ||||
| 
 | ||||
|     # media = EMBY | ||||
|     # | ||||
|     # def __init__(self, arg): | ||||
|     #     self = [e for e in MediaServer if arg.upper() == self.name] | ||||
| 
 | ||||
|     def poster_name(self, name): | ||||
|         if self == MediaServer.EMBY:  # 保存[name].png | ||||
|             return name + '.png' | ||||
|         elif self == MediaServer.KODI:  # 保存[name]-poster.jpg | ||||
|             return name + '-poster.jpg' | ||||
|         elif self == MediaServer.PLEX:  # 保存 poster.jpg | ||||
|             return 'poster.jpg' | ||||
| 
 | ||||
|     def image_name(self, name): | ||||
|         if self == MediaServer.EMBY:  # name.jpg | ||||
|             return name + '.jpg' | ||||
|         elif self == MediaServer.KODI:  # [name]-fanart.jpg | ||||
|             return name + '-fanart.jpg' | ||||
|         elif self == MediaServer.PLEX:  # fanart.jpg | ||||
|             return 'fanart.jpg' | ||||
| @ -1,3 +0,0 @@ | ||||
| from addict import Dict | ||||
| 
 | ||||
| # class Metadata: | ||||
| @ -1,115 +0,0 @@ | ||||
| import re | ||||
| 
 | ||||
| import fuckit | ||||
| 
 | ||||
| 
 | ||||
| class PathNameProcessor: | ||||
|     # 类变量 | ||||
|     pattern_of_file_name_suffixes = r'.(mov|mp4|avi|rmvb|wmv|mov|mkv|flv|ts|m2ts)$' | ||||
| 
 | ||||
|     # def __init__(self): | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def remove_distractions(origin_name): | ||||
|         """移除干扰项""" | ||||
|         # 移除文件类型后缀 | ||||
|         origin_name = re.sub(PathNameProcessor.pattern_of_file_name_suffixes, '', origin_name, 0, re.IGNORECASE) | ||||
| 
 | ||||
|         # 处理包含减号-和_的番号'/-070409_621' | ||||
|         origin_name = re.sub(r'[-_~*# ]', "-", origin_name, 0) | ||||
| 
 | ||||
|         origin_name = re.sub(r'(Carib)(bean)?', '-', origin_name, 0, re.IGNORECASE) | ||||
|         origin_name = re.sub(r'(1pondo)', '-', origin_name, 0, re.IGNORECASE) | ||||
|         origin_name = re.sub(r'(tokyo)[-. ]?(hot)', '-', origin_name, 0, re.IGNORECASE) | ||||
|         origin_name = re.sub(r'Uncensored', '-', origin_name, 0, re.IGNORECASE) | ||||
|         origin_name = re.sub(r'JAV', '-', origin_name, 0, re.IGNORECASE) | ||||
|         # 移除干扰字段 | ||||
|         origin_name = origin_name.replace('22-sht.me', '-') | ||||
| 
 | ||||
|         # 去除文件名中时间 1970-2099年 月 日 | ||||
|         pattern_of_date = r'(?:-)(19[789]\d|20\d{2})(-?(0\d|1[012])-?(0[1-9]|[12]\d|3[01])?)?[-.]' | ||||
|         # 移除字母开头 清晰度相关度 字符 | ||||
|         pattern_of_resolution_alphas = r'(?<![a-zA-Z])(SD|((F|U)|(Full|Ultra)[-_*. ~]?)?HD|BD|(blu[-_*. ~]?ray)|[hx]264|[hx]265|HEVC)' | ||||
|         # 数字开头的 清晰度相关度 字符 | ||||
|         pattern_of_resolution_numbers = r'(?<!\d)(4K|(1080[ip])|(720p)|(480p))' | ||||
|         origin_name = re.sub(pattern_of_resolution_alphas, "-", origin_name, 0, re.IGNORECASE) | ||||
|         origin_name = re.sub(pattern_of_resolution_numbers, "-", origin_name, 0, re.IGNORECASE) | ||||
|         origin_name = re.sub(pattern_of_date, "-", origin_name) | ||||
| 
 | ||||
|         if 'FC2' or 'fc2' in origin_name: | ||||
|             origin_name = origin_name.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-', 'FC2-').replace( | ||||
|                 'FC2PPV_', 'FC2-') | ||||
| 
 | ||||
|         # 移除连续重复无意义符号- | ||||
|         origin_name = re.sub(r"([-.])(\1+)", r"\1", origin_name) | ||||
|         # 移除尾部无意义符号 方便识别剧集数 | ||||
|         origin_name = re.sub(r'[-.]+$', "", origin_name) | ||||
| 
 | ||||
|         return origin_name | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def extract_suffix_episode(origin_name): | ||||
|         """ 提取尾部集数号 123ABC(只识别一位) part1 ,ipz.A  , CD1 , NOP019B.HD.wmv""" | ||||
|         episode = None | ||||
|         with fuckit: | ||||
|             # 零宽断言获取尾部数字 剧集数 123 | ||||
|             pattern_episodes_number = r'(?<!\d)\d$' | ||||
|             episode = re.findall(pattern_episodes_number, origin_name)[-1] | ||||
|             origin_name = re.sub(pattern_episodes_number, "", origin_name) | ||||
|         with fuckit: | ||||
|             # 零宽断言获取尾部字幕 剧集数 abc | ||||
|             pattern_episodes_alpha = r'(?<![a-zA-Z])[a-zA-Z]$' | ||||
|             episode = re.findall(pattern_episodes_alpha, origin_name)[-1] | ||||
|             origin_name = re.sub(pattern_episodes_alpha, "", origin_name) | ||||
|         return episode, origin_name | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def extract_code(origin_name): | ||||
|         """ | ||||
|         提取集数和 规范过的番号 | ||||
|         """ | ||||
|         name = None | ||||
|         episode = None | ||||
|         with fuckit: | ||||
|             # 找到含- 或不含-的 番号:1. 数字+数字 2. 字母+数字 | ||||
|             name = re.findall(r'(?:\d{2,}-\d{2,})|(?:[A-Z]+-?[A-Z]*\d{2,})', origin_name)[-1] | ||||
|             episode = PathNameProcessor.extract_episode_behind_code(origin_name, name) | ||||
|             # 将未-的名字处理加上 - | ||||
|             if not ('-' in name): | ||||
|                 # 无减号-的番号,尝试分段加上- | ||||
|                 # 非贪婪匹配非特殊字符,零宽断言后,数字至少2位连续,ipz221.part2 , mide072hhb ,n1180 | ||||
|                 with fuckit: | ||||
|                     name = re.findall(r'[a-zA-Z]+\d{2,}', name)[-1] | ||||
|                     # 比如MCDV-47 mcdv-047 是2个不一样的片子,但是 SIVR-00008 和 SIVR-008是同同一部,但是heyzo除外,heyzo 是四位数 | ||||
|                     if "heyzo" not in name.lower(): | ||||
|                         name = re.sub(r'([a-zA-Z]{2,})(?:0*?)(\d{2,})', r'\1-\2', name) | ||||
| 
 | ||||
|             # 正则取含-的番号 【字母-[字母]数字】,数字必定大于2位 番号的数组的最后的一个元素 | ||||
|             with fuckit: | ||||
|                 # MKBD_S03-MaRieS | ||||
|                 name = re.findall(r'[a-zA-Z|\d]+-[a-zA-Z|\d]*\d{2,}', name)[-1] | ||||
|                 # 107NTTR-037 -> NTTR-037 , SIVR-00008 -> SIVR-008 ,但是heyzo除外 | ||||
|                 if "heyzo" not in name.lower(): | ||||
|                     searched = re.search(r'([a-zA-Z]{2,})-(?:0*)(\d{3,})', name) | ||||
|                     if searched: | ||||
|                         name = '-'.join(searched.groups()) | ||||
| 
 | ||||
|         return episode, name | ||||
| 
 | ||||
|     @staticmethod | ||||
|     def extract_episode_behind_code(origin_name, code): | ||||
|         episode = None | ||||
| 
 | ||||
|         with fuckit: | ||||
|             # 零宽断言获取尾部字幕 剧集数 abc123 | ||||
|             result_dict = re.search(rf'(?<={code})-?((?P<alpha>([A-Z](?![A-Z])))|(?P<num>\d(?!\d)))', origin_name, | ||||
|                                     re.I).groupdict() | ||||
|             episode = result_dict['alpha'] or result_dict['num'] | ||||
|         return episode | ||||
| 
 | ||||
| 
 | ||||
| def safe_list_get(list_in, idx, default): | ||||
|     try: | ||||
|         return list_in[idx] | ||||
|     except IndexError: | ||||
|         return default | ||||
							
								
								
									
										19
									
								
								Pipfile
									
									
									
									
									
								
							
							
						
						| @ -1,19 +0,0 @@ | ||||
| [[source]] | ||||
| name = "pypi" | ||||
| url = "https://pypi.org/simple" | ||||
| verify_ssl = true | ||||
| 
 | ||||
| [dev-packages] | ||||
| 
 | ||||
| [packages] | ||||
| bs4 = "*" | ||||
| tenacity = "*" | ||||
| fuckit = "*" | ||||
| requests = "*" | ||||
| image = "*" | ||||
| lazyxml = {editable = true,git = "https://github.com/waynedyck/lazyxml.git",ref = "python-3-conversion_wd1"} | ||||
| lxml = "*" | ||||
| pyquery = "*" | ||||
| 
 | ||||
| [requires] | ||||
| python_version = "3.8" | ||||
							
								
								
									
										246
									
								
								Pipfile.lock
									
									
									
										generated
									
									
									
								
							
							
						
						| @ -1,246 +0,0 @@ | ||||
| { | ||||
|     "_meta": { | ||||
|         "hash": { | ||||
|             "sha256": "15bf3c6af3ec315358a0217481a13285f95fc742bb5db8a1f934e0d1c3d7d5e2" | ||||
|         }, | ||||
|         "pipfile-spec": 6, | ||||
|         "requires": { | ||||
|             "python_version": "3.8" | ||||
|         }, | ||||
|         "sources": [ | ||||
|             { | ||||
|                 "name": "pypi", | ||||
|                 "url": "https://pypi.org/simple", | ||||
|                 "verify_ssl": true | ||||
|             } | ||||
|         ] | ||||
|     }, | ||||
|     "default": { | ||||
|         "asgiref": { | ||||
|             "hashes": [ | ||||
|                 "sha256:5ee950735509d04eb673bd7f7120f8fa1c9e2df495394992c73234d526907e17", | ||||
|                 "sha256:7162a3cb30ab0609f1a4c95938fd73e8604f63bdba516a7f7d64b83ff09478f0" | ||||
|             ], | ||||
|             "markers": "python_version >= '3.5'", | ||||
|             "version": "==3.3.1" | ||||
|         }, | ||||
|         "beautifulsoup4": { | ||||
|             "hashes": [ | ||||
|                 "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35", | ||||
|                 "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25", | ||||
|                 "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666" | ||||
|             ], | ||||
|             "version": "==4.9.3" | ||||
|         }, | ||||
|         "bs4": { | ||||
|             "hashes": [ | ||||
|                 "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==0.0.1" | ||||
|         }, | ||||
|         "certifi": { | ||||
|             "hashes": [ | ||||
|                 "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c", | ||||
|                 "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830" | ||||
|             ], | ||||
|             "version": "==2020.12.5" | ||||
|         }, | ||||
|         "chardet": { | ||||
|             "hashes": [ | ||||
|                 "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa", | ||||
|                 "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5" | ||||
|             ], | ||||
|             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'", | ||||
|             "version": "==4.0.0" | ||||
|         }, | ||||
|         "cssselect": { | ||||
|             "hashes": [ | ||||
|                 "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf", | ||||
|                 "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc" | ||||
|             ], | ||||
|             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", | ||||
|             "version": "==1.1.0" | ||||
|         }, | ||||
|         "django": { | ||||
|             "hashes": [ | ||||
|                 "sha256:2d78425ba74c7a1a74b196058b261b9733a8570782f4e2828974777ccca7edf7", | ||||
|                 "sha256:efa2ab96b33b20c2182db93147a0c3cd7769d418926f9e9f140a60dca7c64ca9" | ||||
|             ], | ||||
|             "markers": "python_version >= '3.6'", | ||||
|             "version": "==3.1.5" | ||||
|         }, | ||||
|         "fuckit": { | ||||
|             "hashes": [ | ||||
|                 "sha256:059488e6aa2053da9db5eb5101e2498f608314da5118bf2385acb864568ccc25" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==4.8.1" | ||||
|         }, | ||||
|         "idna": { | ||||
|             "hashes": [ | ||||
|                 "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6", | ||||
|                 "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0" | ||||
|             ], | ||||
|             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", | ||||
|             "version": "==2.10" | ||||
|         }, | ||||
|         "image": { | ||||
|             "hashes": [ | ||||
|                 "sha256:baa2e09178277daa50f22fd6d1d51ec78f19c12688921cb9ab5808743f097126" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==1.5.33" | ||||
|         }, | ||||
|         "lazyxml": { | ||||
|             "editable": true, | ||||
|             "git": "https://github.com/waynedyck/lazyxml.git", | ||||
|             "ref": "f42ea4a4febf4c1e120b05d6ca9cef42556a75d5" | ||||
|         }, | ||||
|         "lxml": { | ||||
|             "hashes": [ | ||||
|                 "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d", | ||||
|                 "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37", | ||||
|                 "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01", | ||||
|                 "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2", | ||||
|                 "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644", | ||||
|                 "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75", | ||||
|                 "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80", | ||||
|                 "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2", | ||||
|                 "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780", | ||||
|                 "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98", | ||||
|                 "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308", | ||||
|                 "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf", | ||||
|                 "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388", | ||||
|                 "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d", | ||||
|                 "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3", | ||||
|                 "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8", | ||||
|                 "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af", | ||||
|                 "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2", | ||||
|                 "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e", | ||||
|                 "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939", | ||||
|                 "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03", | ||||
|                 "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d", | ||||
|                 "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a", | ||||
|                 "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5", | ||||
|                 "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a", | ||||
|                 "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711", | ||||
|                 "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf", | ||||
|                 "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089", | ||||
|                 "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505", | ||||
|                 "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b", | ||||
|                 "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f", | ||||
|                 "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc", | ||||
|                 "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e", | ||||
|                 "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931", | ||||
|                 "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc", | ||||
|                 "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe", | ||||
|                 "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==4.6.2" | ||||
|         }, | ||||
|         "pillow": { | ||||
|             "hashes": [ | ||||
|                 "sha256:165c88bc9d8dba670110c689e3cc5c71dbe4bfb984ffa7cbebf1fac9554071d6", | ||||
|                 "sha256:1d208e670abfeb41b6143537a681299ef86e92d2a3dac299d3cd6830d5c7bded", | ||||
|                 "sha256:22d070ca2e60c99929ef274cfced04294d2368193e935c5d6febfd8b601bf865", | ||||
|                 "sha256:2353834b2c49b95e1313fb34edf18fca4d57446675d05298bb694bca4b194174", | ||||
|                 "sha256:39725acf2d2e9c17356e6835dccebe7a697db55f25a09207e38b835d5e1bc032", | ||||
|                 "sha256:3de6b2ee4f78c6b3d89d184ade5d8fa68af0848f9b6b6da2b9ab7943ec46971a", | ||||
|                 "sha256:47c0d93ee9c8b181f353dbead6530b26980fe4f5485aa18be8f1fd3c3cbc685e", | ||||
|                 "sha256:5e2fe3bb2363b862671eba632537cd3a823847db4d98be95690b7e382f3d6378", | ||||
|                 "sha256:604815c55fd92e735f9738f65dabf4edc3e79f88541c221d292faec1904a4b17", | ||||
|                 "sha256:6c5275bd82711cd3dcd0af8ce0bb99113ae8911fc2952805f1d012de7d600a4c", | ||||
|                 "sha256:731ca5aabe9085160cf68b2dbef95fc1991015bc0a3a6ea46a371ab88f3d0913", | ||||
|                 "sha256:7612520e5e1a371d77e1d1ca3a3ee6227eef00d0a9cddb4ef7ecb0b7396eddf7", | ||||
|                 "sha256:7916cbc94f1c6b1301ac04510d0881b9e9feb20ae34094d3615a8a7c3db0dcc0", | ||||
|                 "sha256:81c3fa9a75d9f1afafdb916d5995633f319db09bd773cb56b8e39f1e98d90820", | ||||
|                 "sha256:887668e792b7edbfb1d3c9d8b5d8c859269a0f0eba4dda562adb95500f60dbba", | ||||
|                 "sha256:93a473b53cc6e0b3ce6bf51b1b95b7b1e7e6084be3a07e40f79b42e83503fbf2", | ||||
|                 "sha256:96d4dc103d1a0fa6d47c6c55a47de5f5dafd5ef0114fa10c85a1fd8e0216284b", | ||||
|                 "sha256:a3d3e086474ef12ef13d42e5f9b7bbf09d39cf6bd4940f982263d6954b13f6a9", | ||||
|                 "sha256:b02a0b9f332086657852b1f7cb380f6a42403a6d9c42a4c34a561aa4530d5234", | ||||
|                 "sha256:b09e10ec453de97f9a23a5aa5e30b334195e8d2ddd1ce76cc32e52ba63c8b31d", | ||||
|                 "sha256:b6f00ad5ebe846cc91763b1d0c6d30a8042e02b2316e27b05de04fa6ec831ec5", | ||||
|                 "sha256:bba80df38cfc17f490ec651c73bb37cd896bc2400cfba27d078c2135223c1206", | ||||
|                 "sha256:c3d911614b008e8a576b8e5303e3db29224b455d3d66d1b2848ba6ca83f9ece9", | ||||
|                 "sha256:ca20739e303254287138234485579b28cb0d524401f83d5129b5ff9d606cb0a8", | ||||
|                 "sha256:cb192176b477d49b0a327b2a5a4979552b7a58cd42037034316b8018ac3ebb59", | ||||
|                 "sha256:cdbbe7dff4a677fb555a54f9bc0450f2a21a93c5ba2b44e09e54fcb72d2bd13d", | ||||
|                 "sha256:cf6e33d92b1526190a1de904df21663c46a456758c0424e4f947ae9aa6088bf7", | ||||
|                 "sha256:d355502dce85ade85a2511b40b4c61a128902f246504f7de29bbeec1ae27933a", | ||||
|                 "sha256:d673c4990acd016229a5c1c4ee8a9e6d8f481b27ade5fc3d95938697fa443ce0", | ||||
|                 "sha256:dc577f4cfdda354db3ae37a572428a90ffdbe4e51eda7849bf442fb803f09c9b", | ||||
|                 "sha256:dd9eef866c70d2cbbea1ae58134eaffda0d4bfea403025f4db6859724b18ab3d", | ||||
|                 "sha256:f50e7a98b0453f39000619d845be8b06e611e56ee6e8186f7f60c3b1e2f0feae" | ||||
|             ], | ||||
|             "markers": "python_version >= '3.6'", | ||||
|             "version": "==8.1.0" | ||||
|         }, | ||||
|         "pyquery": { | ||||
|             "hashes": [ | ||||
|                 "sha256:1fc33b7699455ed25c75282bc8f80ace1ac078b0dda5a933dacbd8b1c1f83963", | ||||
|                 "sha256:a388eefb6bc4a55350de0316fbd97cda999ae669b6743ae5b99102ba54f5aa72" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==1.4.3" | ||||
|         }, | ||||
|         "pytz": { | ||||
|             "hashes": [ | ||||
|                 "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4", | ||||
|                 "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5" | ||||
|             ], | ||||
|             "version": "==2020.5" | ||||
|         }, | ||||
|         "requests": { | ||||
|             "hashes": [ | ||||
|                 "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804", | ||||
|                 "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==2.25.1" | ||||
|         }, | ||||
|         "six": { | ||||
|             "hashes": [ | ||||
|                 "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259", | ||||
|                 "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced" | ||||
|             ], | ||||
|             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", | ||||
|             "version": "==1.15.0" | ||||
|         }, | ||||
|         "soupsieve": { | ||||
|             "hashes": [ | ||||
|                 "sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851", | ||||
|                 "sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e" | ||||
|             ], | ||||
|             "markers": "python_version >= '3.0'", | ||||
|             "version": "==2.1" | ||||
|         }, | ||||
|         "sqlparse": { | ||||
|             "hashes": [ | ||||
|                 "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0", | ||||
|                 "sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8" | ||||
|             ], | ||||
|             "markers": "python_version >= '3.5'", | ||||
|             "version": "==0.4.1" | ||||
|         }, | ||||
|         "tenacity": { | ||||
|             "hashes": [ | ||||
|                 "sha256:baed357d9f35ec64264d8a4bbf004c35058fad8795c5b0d8a7dc77ecdcbb8f39", | ||||
|                 "sha256:e14d191fb0a309b563904bbc336582efe2037de437e543b38da749769b544d7f" | ||||
|             ], | ||||
|             "index": "pypi", | ||||
|             "version": "==6.3.1" | ||||
|         }, | ||||
|         "urllib3": { | ||||
|             "hashes": [ | ||||
|                 "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08", | ||||
|                 "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473" | ||||
|             ], | ||||
|             "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'", | ||||
|             "version": "==1.26.2" | ||||
|         } | ||||
|     }, | ||||
|     "develop": {} | ||||
| } | ||||
							
								
								
									
										389
									
								
								README.md
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| @ -1,263 +1,223 @@ | ||||
| # AV Data Capture (CLI) | ||||
| # AV Data Capture | ||||
| 
 | ||||
| 
 | ||||
| CLI 版本   | ||||
| <a title="Hits" target="_blank" href="https://github.com/yoshiko2/AV_Data_Capture"><img src="https://hits.b3log.org/yoshiko2/AV_Data_Capture.svg"></a> | ||||
|  | ||||
|  | ||||
| <br> | ||||
|  | ||||
|  | ||||
| <br> | ||||
| [GUI 版本](https://github.com/moyy996/AVDC)   | ||||
| <a title="Hits" target="_blank" href="https://github.com/moyy996/avdc"><img src="https://hits.b3log.org/moyy996/AVDC.svg"></a> | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| <br> | ||||
| 
 | ||||
| 
 | ||||
| **日本电影元数据 抓取工具 | 刮削器**,配合本地影片管理软件 Emby, Jellyfin, Kodi 等管理本地影片,该软件起到分类与元数据(metadata)抓取作用,利用元数据信息来分类,供本地影片分类整理使用。   | ||||
| ##### 本地电影刮削与整理一体化解决方案 | ||||
| **日本电影元数据 抓取工具 | 刮削器**,配合本地影片管理软件EMBY,KODI等管理本地影片,该软件起到分类与元数据抓取作用,利用元数据信息来分类,供本地影片分类整理使用。 | ||||
| 
 | ||||
| # 目录 | ||||
| * [声明](#声明) | ||||
| * [FAQ](#FAQ) | ||||
| * [你问我答 FAQ](#你问我答-faq) | ||||
| * [故事](#故事) | ||||
| * [效果图](#效果图) | ||||
| * [如何使用](#如何使用) | ||||
|     * [下载](#下载) | ||||
|     * [简要教程](#简要教程) | ||||
| * [完整文档](#完整文档) | ||||
|     * [模块安装](#模块安装) | ||||
|     * [配置](#配置configini) | ||||
|     * [多目录影片处理](#多目录影片处理) | ||||
|     * [多集影片处理](#多集影片处理) | ||||
|     * [中文字幕处理](#中文字幕处理) | ||||
|     * [异常处理(重要)](#异常处理重要) | ||||
| * [写在后面](#写在后面) | ||||
| * [下载](#下载) | ||||
| * [简明教程](#简要教程) | ||||
| * [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装) | ||||
| * [配置](#2配置configini) | ||||
| * [(可选)设置自定义目录和影片重命名规则](#3可选设置自定义目录和影片重命名规则) | ||||
| * [运行软件](#5运行-av_data_capturepyexe) | ||||
| * [影片原路径处理](#4建议把软件拷贝和电影的统一目录下) | ||||
| * [异常处理(重要)](#51异常处理重要) | ||||
| * [导入至媒体库](#7把jav_output文件夹导入到embykodi中等待元数据刷新完成) | ||||
| * [关于群晖NAS](#8关于群晖NAS) | ||||
| * [写在后面](#9写在后面) | ||||
| 
 | ||||
| # 声明 | ||||
| * 本软件仅供**技术交流,学术交流**使用 | ||||
| * 本软件作者编写出该软件旨在学习 Python ,提高编程水平 | ||||
| * 用户在使用本软件前,请用户自觉遵守当地法律法规,如果本软件使用过程中存在违反当地法律法规的行为,请勿使用该软件 | ||||
| * 用户在使用本软件时,若产生一切违法行为由用户承担 | ||||
| * 严禁用户将本软件使用于商业和个人其他意图 | ||||
| * 本软件作者保留最终决定权和最终解释权 | ||||
| * 本软件仅供**技术交流,学术交流**使用<br> | ||||
| * 本软件作者编写出该软件旨在学习Python3,提高编程水平<br> | ||||
| * 用户在使用该软件前,请用户自觉遵守当地法律法规,如果该软件使用过程中存在违反当地法律法规的行为,请勿使用该软件<br> | ||||
| * 用户使用该软件时,若产生一切违法行为由用户承担<br> | ||||
| * 严禁用户使用于商业和个人其他意图<br> | ||||
| * 本软件作者保留最终决定权和最终解释权<br> | ||||
| 
 | ||||
| **若用户不同意上述条款任意一条,请勿使用本软件** | ||||
| **若用户不同意上述条款任意一条,请勿使用该软件**<br> | ||||
| 
 | ||||
| # FAQ | ||||
| ### 软件能下片吗? | ||||
| * 本软件不提供任何影片下载地址,仅供本地影片分类整理使用 | ||||
| ### 什么是元数据(metadata)? | ||||
| * 元数据包括了影片的封面,导演,演员,简介,类型...... | ||||
| 
 | ||||
| # 你问我答 FAQ | ||||
| ### 这软件能下片吗? | ||||
| * 该软件不提供任何影片下载地址,仅供本地影片分类整理使用。 | ||||
| ### 什么是元数据? | ||||
| * 元数据包括了影片的:封面,导演,演员,简介,类型...... | ||||
| ### 软件收费吗? | ||||
| * 本软件永久免费,**除了作者<ruby>钦<rt>yìng</rt></ruby>点以外** | ||||
| * 软件永久免费。**除了作者钦点以外** | ||||
| ### 软件运行异常怎么办? | ||||
| * 认真看 [异常处理(重要)](#异常处理重要) | ||||
| ### 为什么软件要单线程运行? | ||||
| * 多线程爬取可能会触发网站反爬机制,同时也违背了些道德,故单线程运行 | ||||
| * 认真看 [异常处理(重要)](#5异常处理重要) | ||||
| 
 | ||||
| # 故事 | ||||
| [点击跳转至作者博客文章](https://yoshiko2.github.io/2019/10/18/AVDC/) | ||||
| 
 | ||||
| # 效果图 | ||||
| **图片来自网络**,图片仅供参考,具体效果请自行联想 | ||||
|  | ||||
|  | ||||
| **图片来自网络**,由于相关法律法规,具体效果请自行联想 | ||||
|  | ||||
| <br> | ||||
| 
 | ||||
| # 如何使用 | ||||
| ## 下载 | ||||
| * release的程序可脱离**python环境**运行,可跳过 [模块安装](#模块安装) | ||||
| ### Windows | ||||
| Release 下载地址(**仅限Windows**): | ||||
| ### 下载 | ||||
| * release的程序可脱离**python环境**运行,可跳过 [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装)<br>Release 下载地址(**仅限Windows**):<br>[](https://github.com/yoshiko2/AV_Data_Capture/releases)<br> | ||||
| * Linux,MacOS请下载源码包运行 | ||||
| 
 | ||||
| [](https://github.com/yoshiko2/AV_Data_Capture/releases) | ||||
| 
 | ||||
| * 若 Windows 用户需要运行源代码版本,请安装 Windows Python 环境:[点击前往](https://www.python.org/downloads/windows/) 选中 executable installer 下载 | ||||
| 
 | ||||
| ### MacOS, Linux | ||||
| * MacOS, Linux 用户请下载源码包运行 | ||||
| * MacOS Python环境:开箱即用,[可选安装最新版本](https://docs.brew.sh/Homebrew-and-Python) | ||||
| * Linux Python环境:开箱即用,可选安装最新版本,恕 Linux 版本众多请自行搜索 | ||||
| 
 | ||||
| ## 简要教程: | ||||
| 1. 把软件拉到和电影的同一目录 | ||||
| 2. 设置 config.ini 文件的代理(路由器拥有自动代理功能的可以把 proxy= 后面内容去掉) | ||||
| 3. 运行软件等待完成 | ||||
| 4. 把 JAV_output 导入至 Kodi, Emby, Jellyfin 中。 | ||||
| 
 | ||||
| 详细请看以下完整文档 | ||||
| 
 | ||||
| # 完整文档 | ||||
| 
 | ||||
| ## 模块安装 | ||||
| 如果运行**源码**版,运行前请安装**Python环境**和安装以下**模块**   | ||||
| 
 | ||||
| 在终端 cmd/Powershell/Terminal 中输入以下代码来安装模块 | ||||
| * Windows Python环境:[点击前往](https://www.python.org/downloads/windows/) 选中executable installer下载 | ||||
| * MacOS Python环境:[点击前往](https://www.python.org/downloads/mac-osx/) | ||||
| * Linux Python环境:Linux用户懂的吧,不解释下载地址 | ||||
| ### 简要教程:<br> | ||||
| **1.把软件拉到和电影的同一目录<br>2.设置ini文件的代理(路由器拥有自动代理功能的可以把proxy=后面内容去掉)<br>3.运行软件等待完成<br>4.把JAV_output导入至KODI,EMBY中。<br>详细请看以下教程**<br> | ||||
| 
 | ||||
| ## 1.请安装模块,在CMD/终端逐条输入以下命令安装 | ||||
| ```python | ||||
| pip install requests pyquery lxml Beautifulsoup4 pillow | ||||
| pip install requests | ||||
| ``` | ||||
| ###  | ||||
| ```python | ||||
| pip install pyquery | ||||
| ``` | ||||
| ### | ||||
| ```python | ||||
| pip install lxml | ||||
| ``` | ||||
| ### | ||||
| ```python | ||||
| pip install Beautifulsoup4 | ||||
| ``` | ||||
| ### | ||||
| ```python | ||||
| pip install pillow | ||||
| ``` | ||||
| ### | ||||
| 
 | ||||
| ## 配置config.ini | ||||
| ### 运行模式 | ||||
| ``` | ||||
| [common] | ||||
| main_mode=1 | ||||
| ``` | ||||
| 1为普通模式,   | ||||
| ## 2.配置config.ini | ||||
| >[common]<br> | ||||
| >main_mode=1<br> | ||||
| >failed_output_folder=failed<br> | ||||
| >success_output_folder=JAV_output<br> | ||||
| > | ||||
| >[proxy]<br> | ||||
| >proxy=127.0.0.1:1080<br> | ||||
| >timeout=10<br> | ||||
| >retry=3<br> | ||||
| > | ||||
| >[Name_Rule]<br> | ||||
| >location_rule=actor+'/'+number<br> | ||||
| >naming_rule=number+'-'+title<br> | ||||
| > | ||||
| >[update]<br> | ||||
| >update_check=1<br> | ||||
| > | ||||
| >[media]<br> | ||||
| >media_warehouse=emby<br> | ||||
| >#emby or plex<br> | ||||
| > | ||||
| >[directory_capture]<br> | ||||
| >directory=<br> | ||||
| 
 | ||||
| --- | ||||
| #### 运行模式 | ||||
| >[common]<br> | ||||
| >main_mode=1<br> | ||||
| 
 | ||||
| 1为普通模式<br> | ||||
| 2为整理模式:仅根据女优把电影命名为番号并分类到女优名称的文件夹下 | ||||
| 
 | ||||
| ``` | ||||
| success_output_folder=JAV_outputd | ||||
| failed_output_folder=failed | ||||
| ``` | ||||
| >failed_output_folder=failed<br> | ||||
| >success_output_folder=JAV_outputd<br> | ||||
| 
 | ||||
| 设置成功输出目录和失败输出目录 | ||||
| 
 | ||||
| --- | ||||
| #### 软链接 | ||||
| 方便PT下载完既想刮削又想继续上传的仓鼠党同志 | ||||
| ``` | ||||
| [common] | ||||
| soft_link=0 | ||||
| ``` | ||||
| 1为开启软链接模式   | ||||
| 0为关闭 | ||||
| 
 | ||||
| --- | ||||
| ### 网络设置 | ||||
| ``` | ||||
| [proxy]   | ||||
| proxy=127.0.0.1:1081   | ||||
| timeout=10   | ||||
| retry=3 | ||||
| ```   | ||||
| #### 针对某些地区的代理设置 | ||||
| ``` | ||||
| proxy=127.0.0.1:1081   | ||||
| ``` | ||||
| 
 | ||||
| 打开```config.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口,支持Shadowxxxx/X,V2XXX本地代理端口   | ||||
| 素人系列抓取建议使用日本代理   | ||||
| **路由器拥有自动代理功能的可以把proxy=后面内容去掉**   | ||||
| **本地代理软件开全局模式的用户同上**   | ||||
| **如果遇到tineout错误,可以把文件的proxy=后面的地址和端口删除,并开启代理软件全局模式,或者重启电脑,代理软件,网卡**   | ||||
| 
 | ||||
| --- | ||||
| #### * 针对“某些地区”的代理设置 | ||||
| 打开```config.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口,支持Shadowxxxx/X,V2XXX本地代理端口:<br> | ||||
| 例子:```proxy=127.0.0.1:1080```<br>素人系列抓取建议使用日本代理<br> | ||||
| **路由器拥有自动代理功能的可以把proxy=后面内容去掉**<br> | ||||
| **本地代理软件开全局模式的同志同上**<br> | ||||
| **如果遇到tineout错误,可以把文件的proxy=后面的地址和端口删除,并开启vpn全局模式,或者重启电脑,vpn,网卡**<br> | ||||
| #### 连接超时重试设置 | ||||
| ``` | ||||
| timeout=10   | ||||
| ``` | ||||
| >[proxy]<br> | ||||
| >timeout=10<br> | ||||
| 
 | ||||
| 10为超时重试时间 单位:秒 | ||||
| 
 | ||||
| --- | ||||
| #### 连接重试次数设置 | ||||
| ``` | ||||
| retry=3   | ||||
| ``` | ||||
| >[proxy]<br> | ||||
| >retry=3<br> | ||||
| 
 | ||||
| 3即为重试次数 | ||||
| 
 | ||||
| --- | ||||
| #### 检查更新开关 | ||||
| ``` | ||||
| [update]   | ||||
| update_check=1   | ||||
| ``` | ||||
| >[update]<br> | ||||
| >update_check=1<br> | ||||
| 
 | ||||
| 0为关闭,1为开启,不建议关闭 | ||||
| 
 | ||||
| --- | ||||
| ### 媒体库选择  | ||||
| ``` | ||||
| [media] | ||||
| media_warehouse=emby | ||||
| #emby plex kodi | ||||
| ``` | ||||
| 可选择emby, plex, kodi | ||||
| ##### 媒体库选择  | ||||
| >[media]<br> | ||||
| >media_warehouse=emby<br> | ||||
| >#emby or plex<br> | ||||
| 
 | ||||
| 可选择emby, plex<br> | ||||
| 如果是PLEX,请安装插件:```XBMCnfoMoviesImporter``` | ||||
| 
 | ||||
| --- | ||||
| ### 排除指定字符和目录 | ||||
| ``` | ||||
| [escape]   | ||||
| literals=\   | ||||
| folders=failed,JAV_output | ||||
| ``` | ||||
| #### 抓取目录选择 | ||||
| >[directory_capture]<br> | ||||
| >directory=<br> | ||||
| 如果directory后面为空,则抓取和程序同一目录下的影片,设置为``` * ```可抓取软件所在目录下的所有子目录中的影片<br>如果出错请不要加* | ||||
| 
 | ||||
| ```literals=``` 标题指定字符删除,例如```iterals=\()```,则删除标题中```\()```字符   | ||||
| ```folders=``` 指定目录,例如```folders=failed,JAV_output```,多目录刮削时跳过failed,JAV_output   | ||||
| 
 | ||||
| --- | ||||
| ### 调试模式 | ||||
| ``` | ||||
| [debug_mode] | ||||
| switch=1   | ||||
| ``` | ||||
| #### 调试模式 | ||||
| >[debug_mode]<br>switch=1<br> | ||||
| 
 | ||||
| 如要开启调试模式,请手动输入以上代码到```config.ini```中,开启后可在抓取中显示影片元数据 | ||||
| 
 | ||||
| --- | ||||
| ### (可选)设置自定义目录和影片重命名规则 | ||||
| ``` | ||||
| [Name_Rule] | ||||
| location_rule=actor+'/'+number | ||||
| naming_rule=number+'-'+title | ||||
| ``` | ||||
| 
 | ||||
| ### 3.(可选)设置自定义目录和影片重命名规则 | ||||
| >[Name_Rule]<br> | ||||
| >location_rule=actor+'/'+number<br> | ||||
| >naming_rule=number+'-'+title<br> | ||||
| 
 | ||||
| 已有默认配置 | ||||
| 
 | ||||
| --- | ||||
| #### 命名参数 | ||||
| ``` | ||||
| title = 片名 | ||||
| actor = 演员 | ||||
| studio = 公司 | ||||
| director = 导演 | ||||
| release = 发售日 | ||||
| year = 发行年份 | ||||
| number = 番号 | ||||
| cover = 封面链接 | ||||
| tag = 类型 | ||||
| outline = 简介 | ||||
| runtime = 时长 | ||||
| ``` | ||||
| >title = 片名<br> | ||||
| >actor = 演员<br> | ||||
| >studio = 公司<br> | ||||
| >director = 导演<br> | ||||
| >release = 发售日<br> | ||||
| >year = 发行年份<br> | ||||
| >number = 番号<br> | ||||
| >cover = 封面链接<br> | ||||
| >tag = 类型<br> | ||||
| >outline = 简介<br> | ||||
| >runtime = 时长<br> | ||||
| 
 | ||||
| 上面的参数以下都称之为**变量** | ||||
| 
 | ||||
| #### 例子: | ||||
| 自定义规则方法:有两种元素,变量和字符,无论是任何一种元素之间连接必须要用加号 **+** ,比如:```'naming_rule=['+number+']-'+title```,其中冒号 ' ' 内的文字是字符,没有冒号包含的文字是变量,元素之间连接必须要用加号 **+**  | ||||
| 
 | ||||
| 目录结构规则:默认 ```location_rule=actor+'/'+number``` | ||||
| 
 | ||||
| **不推荐修改时在这里添加 title**,有时 title 过长,因为 Windows API 问题,抓取数据时新建文件夹容易出错。 | ||||
| 
 | ||||
| 影片命名规则:默认 ```naming_rule=number+'-'+title``` | ||||
| 
 | ||||
| **在 Emby, Kodi等本地媒体库显示的标题,不影响目录结构下影片文件的命名**,依旧是 番号+后缀。 | ||||
| 
 | ||||
| --- | ||||
| 自定义规则方法:有两种元素,变量和字符,无论是任何一种元素之间连接必须要用加号 **+** ,比如:```'naming_rule=['+number+']-'+title```,其中冒号 ' ' 内的文字是字符,没有冒号包含的文字是变量,元素之间连接必须要用加号 **+** <br> | ||||
| 目录结构规则:默认 ```location_rule=actor+'/'+number```<br> **不推荐修改时在这里添加title**,有时title过长,因为Windows API问题,抓取数据时新建文件夹容易出错。<br> | ||||
| 影片命名规则:默认 ```naming_rule=number+'-'+title```<br> **在EMBY,KODI等本地媒体库显示的标题,不影响目录结构下影片文件的命名**,依旧是 番号+后缀。 | ||||
| 
 | ||||
| ### 更新开关 | ||||
| ``` | ||||
| [update] | ||||
| update_check=1 | ||||
| ``` | ||||
| 
 | ||||
| >[update]<br>update_check=1<br> | ||||
| 1为开,0为关 | ||||
| 
 | ||||
| ## 多目录影片处理 | ||||
| 可以在多个有影片目录的父目录下搜索影片后缀,然后剪切到和程序同一目录下   | ||||
| 
 | ||||
| ## 多集影片处理 | ||||
| **建议使用视频合并合并为一个视频文件** | ||||
| 可以把多集电影按照集数后缀命名为类似```ssni-xxx-cd1.mp4m,ssni-xxx-cd2.mp4,abp-xxx-CD1.mp4```的规则,只要含有```-CDn./-cdn.```类似命名规则,即可使用分集功能 | ||||
| 
 | ||||
| ## 中文字幕处理 | ||||
| 
 | ||||
| 运行 ```AV_Data_capture.py/.exe``` | ||||
| 
 | ||||
| 当文件名包含: | ||||
| ## 4.建议把软件拷贝和电影的统一目录下 | ||||
| 如果```config.ini```中```directory=```后面为空的情况下 | ||||
| ## 5.运行 ```AV_Data_capture.py/.exe``` | ||||
| 当文件名包含:<br> | ||||
| 中文,字幕,-c., -C., 处理元数据时会加上**中文字幕**标签 | ||||
| 
 | ||||
| ## 异常处理(重要) | ||||
| 
 | ||||
| ## 5.1 异常处理(重要) | ||||
| ### 请确保软件是完整地!确保ini文件内容是和下载提供ini文件内容的一致的! | ||||
| --- | ||||
| ### 关于软件打开就闪退 | ||||
| @ -267,60 +227,47 @@ update_check=1 | ||||
| ### 关于 ```Updata_check``` 和 ```JSON``` 相关的错误 | ||||
| 跳转 [网络设置](#网络设置) | ||||
| 
 | ||||
| --- | ||||
| ### 关于字幕文件移动功能 | ||||
| 字幕文件前缀必须与影片文件前缀一致,才可以使用该功能 | ||||
| 
 | ||||
| --- | ||||
| ### 关于```FileNotFoundError: [WinError 3] 系统找不到指定的路径。: 'JAV_output''```  | ||||
| 在软件所在文件夹下新建 JAV_output 文件夹,可能是你没有把软件拉到和电影的同一目录 | ||||
| 
 | ||||
| --- | ||||
| ### 关于连接拒绝的错误 | ||||
| 请设置好[代理](#针对某些地区的代理设置) | ||||
| 请设置好[代理](#针对某些地区的代理设置)<br> | ||||
| 
 | ||||
| --- | ||||
| ### 关于Nonetype,xpath报错 | ||||
| 同上 | ||||
| 同上<br> | ||||
| 
 | ||||
| --- | ||||
| ### 关于番号提取失败或者异常 | ||||
| **目前可以提取元素的影片:JAVBUS上有元数据的电影,素人系列:300Maan,259luxu,siro等,FC2系列** | ||||
| 
 | ||||
| >下一张图片来自 Pockies 的 blog 原作者已授权 | ||||
| **目前可以提取元素的影片:JAVBUS上有元数据的电影,素人系列:300Maan,259luxu,siro等,FC2系列**<br> | ||||
| >下一张图片来自Pockies的blog 原作者已授权<br> | ||||
| 
 | ||||
|  | ||||
| 
 | ||||
| 目前作者已经完善了番号提取机制,功能较为强大,可提取上述文件名的的番号,如果出现提取失败或者异常的情况,请用以下规则命名 | ||||
| 
 | ||||
| 
 | ||||
| 目前作者已经完善了番号提取机制,功能较为强大,可提取上述文件名的的番号,如果出现提取失败或者异常的情况,请用以下规则命名<br> | ||||
| **妈蛋不要喂软件那么多野鸡片子,不让软件好好活了,操** | ||||
| ``` | ||||
| COSQ-004.mp4 | ||||
| ``` | ||||
| 
 | ||||
| 针对 **野鸡番号** ,你需要把文件名命名为与抓取网站提供的番号一致(文件拓展名除外),然后把文件拖拽至core.exe/.py<br> | ||||
| **野鸡番号**:比如 ```XXX-XXX-1```,  ```1301XX-MINA_YUKA``` 这种**野鸡**番号,在javbus等资料库存在的作品。<br>**重要**:除了 **影片文件名**  ```XXXX-XXX-C```,后面这种-C的是指电影有中文字幕!<br> | ||||
| 条件:文件名中间要有下划线或者减号"_","-",没有多余的内容只有番号为最佳,可以让软件更好获取元数据 | ||||
| 对于多影片重命名,可以用 [ReNamer](http://www.den4b.com/products/renamer) 来批量重命名 | ||||
| 
 | ||||
| 对于多影片重命名,可以用[ReNamer](http://www.den4b.com/products/renamer)来批量重命名<br> | ||||
| 
 | ||||
| --- | ||||
| ### 关于PIL/image.py | ||||
| 暂时无解,可能是网络问题或者pillow模块打包问题,你可以用源码运行(要安装好第一步的模块) | ||||
| 
 | ||||
| ### 拖动法 | ||||
| 针对格式比较奇葩的番号   | ||||
| 影片放在和程序同一目录下,拖动至```AV_Data_Capture.exe```,即可完成刮削和整理 | ||||
| 
 | ||||
| 
 | ||||
| ### 软件会自动把元数据获取成功的电影移动到 JAV_output 文件夹中,根据演员分类,失败的电影移动到failed文件夹中。 | ||||
| 
 | ||||
| ### 把JAV_output文件夹导入到 Emby, Kodi中,等待元数据刷新,完成 | ||||
| 
 | ||||
| ### 关于群晖NAS | ||||
| 开启 SMB,并在 Windows 上挂载为网络磁盘即可使用本软件,也适用于其他 NAS | ||||
| 
 | ||||
| ## 写在后面 | ||||
| 怎么样,看着自己的日本电影被这样完美地管理,是不是感觉成就感爆棚呢? | ||||
| 
 | ||||
| **tg官方电报群:[ 点击进群](https://t.me/joinchat/J54y1g3-a7nxJ_-WS4-KFQ)** | ||||
| 
 | ||||
| ## 6.软件会自动把元数据获取成功的电影移动到JAV_output文件夹中,根据演员分类,失败的电影移动到failed文件夹中。 | ||||
| ## 7.把JAV_output文件夹导入到EMBY,KODI中,等待元数据刷新,完成 | ||||
| ## 8.关于群晖NAS | ||||
| 开启SMB在Windows上挂载为网络磁盘即可使用本软件,也适用于其他NAS | ||||
| ## 9.写在后面 | ||||
| 怎么样,看着自己的日本电影被这样完美地管理,是不是感觉成就感爆棚呢?<br> | ||||
| **tg官方电报群:[ 点击进群](https://t.me/AV_Data_Capture_Official)**<br> | ||||
| 
 | ||||
| 
 | ||||
|  | ||||
| @ -1,229 +0,0 @@ | ||||
| #!/usr/bin/python3 | ||||
| # -*- coding: utf-8 -*- | ||||
| import json | ||||
| import re | ||||
| 
 | ||||
| from lxml import etree | ||||
| 
 | ||||
| from ADC_function import * | ||||
| 
 | ||||
| # import sys | ||||
| # import io | ||||
| # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) | ||||
| 
 | ||||
| 
 | ||||
| def getTitle(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser()) | ||||
|     result = html.xpath('//*[@id="title"]/text()')[0] | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getActor(text): | ||||
|     # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() | ||||
|     html = etree.fromstring(text, etree.HTMLParser()) | ||||
|     result = ( | ||||
|         str( | ||||
|             html.xpath( | ||||
|                 "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()" | ||||
|             ) | ||||
|         ) | ||||
|         .strip(" ['']") | ||||
|         .replace("', '", ",") | ||||
|     ) | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getStudio(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     try: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()" | ||||
|         )[0] | ||||
|     except: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'メーカー')]/following-sibling::td/text()" | ||||
|         )[0] | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getRuntime(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0] | ||||
|     return re.search(r"\d+", str(result)).group() | ||||
| 
 | ||||
| 
 | ||||
| def getLabel(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     try: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'シリーズ:')]/following-sibling::td/a/text()" | ||||
|         )[0] | ||||
|     except: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'シリーズ:')]/following-sibling::td/text()" | ||||
|         )[0] | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getNum(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     try: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'品番:')]/following-sibling::td/a/text()" | ||||
|         )[0] | ||||
|     except: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'品番:')]/following-sibling::td/text()" | ||||
|         )[0] | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getYear(getRelease): | ||||
|     try: | ||||
|         result = str(re.search(r"\d{4}", getRelease).group()) | ||||
|         return result | ||||
|     except: | ||||
|         return getRelease | ||||
| 
 | ||||
| 
 | ||||
| def getRelease(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     try: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'発売日:')]/following-sibling::td/a/text()" | ||||
|         )[0].lstrip("\n") | ||||
|     except: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'発売日:')]/following-sibling::td/text()" | ||||
|         )[0].lstrip("\n") | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getTag(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     try: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'ジャンル:')]/following-sibling::td/a/text()" | ||||
|         ) | ||||
|     except: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'ジャンル:')]/following-sibling::td/text()" | ||||
|         ) | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getCover(text, number): | ||||
|     html = etree.fromstring(text, etree.HTMLParser()) | ||||
|     cover_number = number | ||||
|     try: | ||||
|         result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] | ||||
|     except: | ||||
|         # sometimes fanza modify _ to \u0005f for image id | ||||
|         if "_" in cover_number: | ||||
|             cover_number = cover_number.replace("_", r"\u005f") | ||||
|         try: | ||||
|             result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0] | ||||
|         except: | ||||
|             # (TODO) handle more edge case | ||||
|             # print(html) | ||||
|             # raise exception here, same behavior as before | ||||
|             # people's major requirement is fetching the picture | ||||
|             raise ValueError("can not find image") | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getDirector(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     try: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'監督:')]/following-sibling::td/a/text()" | ||||
|         )[0] | ||||
|     except: | ||||
|         result = html.xpath( | ||||
|             "//td[contains(text(),'監督:')]/following-sibling::td/text()" | ||||
|         )[0] | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def getOutline(text): | ||||
|     html = etree.fromstring(text, etree.HTMLParser()) | ||||
|     try: | ||||
|         result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace( | ||||
|             "\n", "" | ||||
|         ) | ||||
|         if result == "": | ||||
|             result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace( | ||||
|                 "\n", "" | ||||
|             ) | ||||
|     except: | ||||
|         # (TODO) handle more edge case | ||||
|         # print(html) | ||||
|         return "" | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| def main(number): | ||||
|     # fanza allow letter + number + underscore, normalize the input here | ||||
|     # @note: I only find the usage of underscore as h_test123456789 | ||||
|     fanza_search_number = number | ||||
|     # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix | ||||
|     if fanza_search_number.startswith("h-"): | ||||
|         fanza_search_number = fanza_search_number.replace("h-", "h_") | ||||
| 
 | ||||
|     fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower() | ||||
| 
 | ||||
|     fanza_urls = [ | ||||
|         "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=", | ||||
|         "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=", | ||||
|         "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=", | ||||
|         "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=", | ||||
|     ] | ||||
|     chosen_url = "" | ||||
|     for url in fanza_urls: | ||||
|         chosen_url = url + fanza_search_number | ||||
|         htmlcode = get_html(chosen_url) | ||||
|         if "404 Not Found" not in htmlcode: | ||||
|             break | ||||
|     if "404 Not Found" in htmlcode: | ||||
|         return json.dumps({"title": "",}) | ||||
|     try: | ||||
|         # for some old page, the input number does not match the page | ||||
|         # for example, the url will be cid=test012 | ||||
|         # but the hinban on the page is test00012 | ||||
|         # so get the hinban first, and then pass it to following functions | ||||
|         fanza_hinban = getNum(htmlcode) | ||||
|         data = { | ||||
|             "title": getTitle(htmlcode).strip(getActor(htmlcode)), | ||||
|             "studio": getStudio(htmlcode), | ||||
|             "outline": getOutline(htmlcode), | ||||
|             "runtime": getRuntime(htmlcode), | ||||
|             "director": getDirector(htmlcode) if "anime" not in chosen_url else "", | ||||
|             "actor": getActor(htmlcode) if "anime" not in chosen_url else "", | ||||
|             "release": getRelease(htmlcode), | ||||
|             "number": fanza_hinban, | ||||
|             "cover": getCover(htmlcode, fanza_hinban), | ||||
|             "imagecut": 1, | ||||
|             "tag": getTag(htmlcode), | ||||
|             "label": getLabel(htmlcode), | ||||
|             "year": getYear( | ||||
|                 getRelease(htmlcode) | ||||
|             ),  # str(re.search('\d{4}',getRelease(a)).group()), | ||||
|             "actor_photo": "", | ||||
|             "website": chosen_url, | ||||
|             "source": "fanza.py", | ||||
|         } | ||||
|     except: | ||||
|         data = { | ||||
|             "title": "", | ||||
|         } | ||||
|     js = json.dumps( | ||||
|         data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":") | ||||
|     )  # .encode('UTF-8') | ||||
|     return js | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     # print(main("DV-1562")) | ||||
|     # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") | ||||
|     # print(main("ipx292")) | ||||
|     pass | ||||
| @ -1,162 +0,0 @@ | ||||
| import re | ||||
| from lxml import etree#need install | ||||
| import json | ||||
| import ADC_function | ||||
| # import sys | ||||
| # import io | ||||
| # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) | ||||
| 
 | ||||
| def getTitle(htmlcode): #获取厂商 | ||||
|     #print(htmlcode) | ||||
|     html = etree.fromstring(htmlcode,etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']") | ||||
|     result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1) | ||||
|     #print(result2) | ||||
|     return result2 | ||||
| def getActor(htmlcode): | ||||
|     try: | ||||
|         html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|         result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']") | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| def getStudio(htmlcode): #获取厂商 | ||||
|     html = etree.fromstring(htmlcode,etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def getNum(htmlcode):     #获取番号 | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") | ||||
|     #print(result) | ||||
|     return result | ||||
| def getRelease(htmlcode2): # | ||||
|     #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') | ||||
|     html=etree.fromstring(htmlcode2,etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def getCover(htmlcode,number,htmlcode2): #获取厂商 # | ||||
|     #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php') | ||||
|     html = etree.fromstring(htmlcode2, etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']") | ||||
|     if result == '': | ||||
|         html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|         result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']") | ||||
|         return 'https://fc2club.com' +  result2 | ||||
|     return 'http:' + result | ||||
| def getOutline(htmlcode2):     #获取番号 # | ||||
|     html = etree.fromstring(htmlcode2, etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',') | ||||
|     return result | ||||
| def getTag(htmlcode):     #获取番号 | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()')) | ||||
|     return result.strip(" ['']").replace("'",'').replace(' ','') | ||||
| def getYear(release): | ||||
|     try: | ||||
|         result = re.search('\d{4}',release).group() | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| 
 | ||||
| def getTitle_fc2com(htmlcode): #获取厂商 | ||||
|     html = etree.fromstring(htmlcode,etree.HTMLParser()) | ||||
|     result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0] | ||||
|     return result | ||||
| def getActor_fc2com(htmlcode): | ||||
|     try: | ||||
|         html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|         result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0] | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| def getStudio_fc2com(htmlcode): #获取厂商 | ||||
|     try: | ||||
|         html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|         result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']") | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| def getNum_fc2com(htmlcode):     #获取番号 | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def getRelease_fc2com(htmlcode2): # | ||||
|     html=etree.fromstring(htmlcode2,etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def getCover_fc2com(htmlcode2): #获取厂商 # | ||||
|     html = etree.fromstring(htmlcode2, etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']") | ||||
|     return 'http:' + result | ||||
| def getOutline_fc2com(htmlcode2):     #获取番号 # | ||||
|     html = etree.fromstring(htmlcode2, etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',') | ||||
|     return result | ||||
| def getTag_fc2com(number):     #获取番号 | ||||
|     htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape')) | ||||
|     result = re.findall('"tag":"(.*?)"', htmlcode) | ||||
|     return result | ||||
| def getYear_fc2com(release): | ||||
|     try: | ||||
|         result = re.search('\d{4}',release).group() | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| 
 | ||||
| def main(number): | ||||
|     try: | ||||
|         htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/') | ||||
|         htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html') | ||||
|         actor = getActor(htmlcode) | ||||
|         if getActor(htmlcode) == '': | ||||
|             actor = 'FC2系列' | ||||
|         dic = { | ||||
|             'title':    getTitle(htmlcode), | ||||
|             'studio':   getStudio(htmlcode), | ||||
|             'year': '',#str(re.search('\d{4}',getRelease(number)).group()), | ||||
|             'outline':  '',#getOutline(htmlcode2), | ||||
|             'runtime':  getYear(getRelease(htmlcode)), | ||||
|             'director': getStudio(htmlcode), | ||||
|             'actor':    actor, | ||||
|             'release':  getRelease(number), | ||||
|             'number':  'FC2-'+number, | ||||
|             'label': '', | ||||
|             'cover':    getCover(htmlcode,number,htmlcode2), | ||||
|             'imagecut': 0, | ||||
|             'tag':      getTag(htmlcode), | ||||
|             'actor_photo':'', | ||||
|             'website':  'https://fc2club.com//html/FC2-' + number + '.html', | ||||
|             'source':'https://fc2club.com//html/FC2-' + number + '.html', | ||||
|         } | ||||
|         if dic['title'] == '': | ||||
|             htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'}) | ||||
|             actor = getActor(htmlcode) | ||||
|             if getActor(htmlcode) == '': | ||||
|                 actor = 'FC2系列' | ||||
|             dic = { | ||||
|                 'title': getTitle_fc2com(htmlcode2), | ||||
|                 'studio': getStudio_fc2com(htmlcode2), | ||||
|                 'year': '',  # str(re.search('\d{4}',getRelease(number)).group()), | ||||
|                 'outline': getOutline_fc2com(htmlcode2), | ||||
|                 'runtime': getYear_fc2com(getRelease(htmlcode2)), | ||||
|                 'director': getStudio_fc2com(htmlcode2), | ||||
|                 'actor': actor, | ||||
|                 'release': getRelease_fc2com(number), | ||||
|                 'number': 'FC2-' + number, | ||||
|                 'cover': getCover_fc2com(htmlcode2), | ||||
|                 'imagecut': 0, | ||||
|                 'tag': getTag_fc2com(number), | ||||
|                 'label': '', | ||||
|                 'actor_photo': '', | ||||
|                 'website': 'http://adult.contents.fc2.com/article/' + number + '/', | ||||
|                 'source': 'http://adult.contents.fc2.com/article/' + number + '/', | ||||
|             } | ||||
|     except Exception as e: | ||||
|         # (TODO) better handle this | ||||
|         # print(e) | ||||
|         dic = {"title": ""} | ||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') | ||||
|     return js | ||||
| 
 | ||||
| 
 | ||||
| #print(main('1252953')) | ||||
| @ -1,123 +0,0 @@ | ||||
| import re | ||||
| from lxml import etree | ||||
| import json | ||||
| from bs4 import BeautifulSoup | ||||
| from ADC_function import * | ||||
| # import sys | ||||
| # import io | ||||
| # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) | ||||
| 
 | ||||
| def getTitle(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser()) | ||||
|     result = html.xpath("/html/body/section/div/h2/strong/text()")[0] | ||||
|     return result | ||||
| def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ') | ||||
| def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img | ||||
|     a = actor.split(',') | ||||
|     d={} | ||||
|     for i in a: | ||||
|         p={i:''} | ||||
|         d.update(p) | ||||
|     return d | ||||
| def getStudio(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') | ||||
| def getRuntime(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').rstrip('mi') | ||||
| def getLabel(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') | ||||
| def getNum(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result2 + result1).strip('+') | ||||
| def getYear(getRelease): | ||||
|     try: | ||||
|         result = str(re.search('\d{4}', getRelease).group()) | ||||
|         return result | ||||
|     except: | ||||
|         return getRelease | ||||
| def getRelease(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+') | ||||
| def getTag(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',') | ||||
| def getCover_small(a, index=0): | ||||
|     # same issue mentioned below, | ||||
|     # javdb sometime returns multiple results | ||||
|     # DO NOT just get the firt one, get the one with correct index number | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index] | ||||
|     if not 'https' in result: | ||||
|         result = 'https:' + result | ||||
|     return result | ||||
| def getCover(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']") | ||||
|     return result | ||||
| def getDirector(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace("', '", '').replace('"', '') | ||||
| def getOutline(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def main(number): | ||||
|     try: | ||||
|         number = number.upper() | ||||
|         query_result = get_html('https://javdb.com/search?q=' + number + '&f=all') | ||||
|         html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|         # javdb sometime returns multiple results, | ||||
|         # and the first elememt maybe not the one we are looking for | ||||
|         # iterate all candidates and find the match one | ||||
|         urls = html.xpath('//*[@id="videos"]/div/div/a/@href') | ||||
|         ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()') | ||||
|         correct_url = urls[ids.index(number)] | ||||
|         detail_page = get_html('https://javdb.com' + correct_url) | ||||
|         dic = { | ||||
|             'actor': getActor(detail_page), | ||||
|             'title': getTitle(detail_page), | ||||
|             'studio': getStudio(detail_page), | ||||
|             'outline': getOutline(detail_page), | ||||
|             'runtime': getRuntime(detail_page), | ||||
|             'director': getDirector(detail_page), | ||||
|             'release': getRelease(detail_page), | ||||
|             'number': getNum(detail_page), | ||||
|             'cover': getCover(detail_page), | ||||
|             'cover_small': getCover_small(query_result, index=ids.index(number)), | ||||
|             'imagecut': 3, | ||||
|             'tag': getTag(detail_page), | ||||
|             'label': getLabel(detail_page), | ||||
|             'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()), | ||||
|             'actor_photo': getActorPhoto(getActor(detail_page)), | ||||
|             'website': 'https://javdb.com' + correct_url, | ||||
|             'source': 'javdb.py', | ||||
|         } | ||||
|     except Exception as e: | ||||
|         # print(e) | ||||
|         dic = {"title": ""} | ||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8') | ||||
|     return js | ||||
| 
 | ||||
| # main('DV-1562') | ||||
| # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看和错误信息。") | ||||
| #print(main('ipx-292')) | ||||
| @ -1,41 +0,0 @@ | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4 | ||||
| /Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21299.mkv | ||||
| /Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4 | ||||
| /mcdv47.avi | ||||
| /mcdv-47.avi | ||||
| /mcdv-047.mp4 | ||||
| /mcdv047.mp4 | ||||
| /mcdv0047.mp4 | ||||
| /1pondo-070409_621.mp4 | ||||
| /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4 | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4 | ||||
| /Volumes/Adult/Files/107NTTR-037A.mp4 | ||||
| /Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4 | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv | ||||
| /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb | ||||
| /Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4 | ||||
| /Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv | ||||
| /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv | ||||
| /Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4 | ||||
| /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv | ||||
| /Volumes/Adult/Files/tia/soe935C.HD.wmv | ||||
| /Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv | ||||
| /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4 | ||||
| /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts | ||||
| /Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4 | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4 | ||||
| /Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4 | ||||
| /Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv | ||||
| /Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv | ||||
| @ -1,51 +0,0 @@ | ||||
| /Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21222.mkv | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4 | ||||
| /Volumes/192.168.2.100/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 | ||||
| /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-1 彼女の姉貴とイケナイ関係 Rio.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999A 彼女の姉貴とイケナイ関係 Rio.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-A 彼女の姉貴とイケナイ関係 Rio.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-C 彼女の姉貴とイケナイ関係 Rio.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-B 彼女の姉貴とイケナイ関係 Rio.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/tia/soe935C.HD.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/tia/soe935B.HD.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/tia/soe935A.HD.wmv | ||||
| /Volumes/192.168.2.100/Adult/Files/tia/soe935D.HD.wmv | ||||
| /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv | ||||
| /Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4 | ||||
| /mcdv47.avi | ||||
| /mcdv-47.avi | ||||
| /mcdv-047.mp4 | ||||
| /mcdv047.mp4 | ||||
| /mcdv0047.mp4 | ||||
| /1pondo-070409_621.mp4 | ||||
| /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4 | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4 | ||||
| /Volumes/Adult/Files/107NTTR-037A.mp4 | ||||
| /Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4 | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv | ||||
| /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb | ||||
| /Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4 | ||||
| /Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv | ||||
| /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv | ||||
| /Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4 | ||||
| /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv | ||||
| /Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv | ||||
| /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4 | ||||
| /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts | ||||
| /Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4 | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4 | ||||
| /Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4 | ||||
| /Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv | ||||
| /Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv | ||||
| @ -1,50 +0,0 @@ | ||||
| /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv | ||||
| /1pondo-070409_621.mp4 | ||||
| /Volumes/Adult/Files/107NTTR-037.mp4 | ||||
| /Volumes/Adult/Files/107NTTR-037A.mp4 | ||||
| /Volumes/Adult/Files/Yua.Mikami-PML/TEK-097 ふたりは無敵.wmv | ||||
| /Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発!禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4 | ||||
| /Volumes/Adult/Files/Yua.Mikami-PML/SSNI-030 三上悠亜ファン感謝祭 国民的アイドル×一般ユーザー20人‘ガチファンとSEX解禁’ハメまくりスペシャル【桃花族】.mp4 | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/MIDD-893A.mkv | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv | ||||
| /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発! CD1.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv | ||||
| /Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて~波多野結衣~.rmvb | ||||
| /Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4 | ||||
| /Volumes/Adult/Files/桜木凛 Rin Sakuragi FHD Collection Pack Vol/BBI-183.wmv | ||||
| /Volumes/Adult/Files/NOP-019 芭蕾教室 水嶋あずみ/NOP019B.HD.wmv | ||||
| /Volumes/Adult/Files/一ノ瀬アメリ part2/栗栖エリカ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi | ||||
| /Volumes/Adult/Files/一ノ瀬アメリ part2/Max Girls/Max Girls 24(xv804)伊東遥,Rio,小沢アリス,葉月しおり,一ノ瀬アメリ,ひなた結衣,藤崎りお.avi | ||||
| /Volumes/Adult/Files/一ノ瀬アメリ part2/ノ瀬アメリAmeri Ichinose/20091127一ノ瀬アメリ - 一見面就做愛(xv801).avi | ||||
| /Volumes/Adult/Files/Aki Sasaki Megapack/MSTG-003.mkv | ||||
| /Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv | ||||
| /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv | ||||
| /Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4 | ||||
| /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-171)彼女のお姉さんは、誘惑ヤリたがり娘。桃谷エリカ.wmv | ||||
| /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-145)濃密な接吻と欲情ベロキス性交 04 桃谷エリカ.wmv | ||||
| /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv | ||||
| /Volumes/Adult/Files/tia/soe935C.HD.wmv | ||||
| /Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv | ||||
| /Volumes/Adult/Files/sakumomo1203-PML/IDBD-795 ももに夢中 2018年日本人にもっとも愛された女優桜空ももPREMIUM BOX8時間BEST.mp4 | ||||
| /Volumes/Adult/Files/sakumomo1203-PML/IDBD-768 Gカップグラビアアイドル桜空もも初ベスト 原石 2【桃花族】.mp4 | ||||
| /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4 | ||||
| /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4 | ||||
| /Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts | ||||
| /Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4 | ||||
| /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#3(190119)@RUNBKK/No-Watermarked/SOE976.FHD3.wmv | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4 | ||||
| /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4 | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4 | ||||
| /Volumes/Adult/Files/ノ瀬アメリ/20100226一ノ瀬アメリ - OL Style 制服(xv827).avi | ||||
| /Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4 | ||||
| /Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4 | ||||
| /Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv | ||||
| /Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv | ||||
| /Volumes/Adult/Files/Uncensored Mosaic Removal Megapack/ADN-017(Asami Ogawa).mp4 | ||||
							
								
								
									
										12
									
								
								SiteSource/avsox.py → avsox.py
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| @ -3,9 +3,6 @@ from lxml import etree | ||||
| import json | ||||
| from bs4 import BeautifulSoup | ||||
| from ADC_function import * | ||||
| # import sys | ||||
| # import io | ||||
| # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) | ||||
| 
 | ||||
| def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img | ||||
|     soup = BeautifulSoup(htmlcode, 'lxml') | ||||
| @ -74,17 +71,16 @@ def getTag(a):  # 获取演员 | ||||
|     return d | ||||
| 
 | ||||
| def main(number): | ||||
|     url = 'https://avsox.host/cn/search/' + number | ||||
|     a = get_html(url) | ||||
|     a = get_html('https://avsox.asia/cn/search/' + number) | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") | ||||
|     if result1 == '' or result1 == 'null' or result1 == 'None': | ||||
|         a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_')) | ||||
|         a = get_html('https://avsox.asia/cn/search/' + number.replace('-', '_')) | ||||
|         print(a) | ||||
|         html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|         result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") | ||||
|         if result1 == '' or result1 == 'null' or result1 == 'None': | ||||
|             a = get_html('https://avsox.host/cn/search/' + number.replace('_', '')) | ||||
|             a = get_html('https://avsox.asia/cn/search/' + number.replace('_', '')) | ||||
|             print(a) | ||||
|             html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|             result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']") | ||||
| @ -113,4 +109,4 @@ def main(number): | ||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8') | ||||
|     return js | ||||
| 
 | ||||
| #print(main('012717_472')) | ||||
| #print(main('041516_541')) | ||||
							
								
								
									
										36
									
								
								config.ini
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| @ -1,35 +1,23 @@ | ||||
| [common] | ||||
| main_mode=2 | ||||
| # 路径均为绝对路径,不要写入" '等符号 | ||||
| search_folder=          /Volumes/192.168.2.100/Adult/AVTest | ||||
| # 如果failed_output_folder 为空,抓取不到相关信息的视频将不回移动 | ||||
| failed_output_folder=   /Volumes/192.168.2.100/Adult/UnknownStars | ||||
| success_output_folder=  /Volumes/192.168.2.100/Adult/Files | ||||
| #临时资源存储路径,比如xxx.nfo 海报图 | ||||
| temp_folder=            /Volumes/192.168.2.100/Adult/temp | ||||
| # 如果是远程挂载的盘符,建议不开启创建软连接:软连接链接的是绝对路径,远程NAS上的路径和本地挂载的路径一般不同。 | ||||
| soft_link=0 | ||||
| main_mode=1 | ||||
| failed_output_folder=failed | ||||
| success_output_folder=JAV_output | ||||
| 
 | ||||
| [proxy] | ||||
| #例子为socks代理配置,可以 =后留空 | ||||
| proxy=      socks5h://127.0.0.1:1081 | ||||
| timeout=    10 | ||||
| retry=      5 | ||||
| proxy=127.0.0.1:1080 | ||||
| timeout=10 | ||||
| retry=3 | ||||
| 
 | ||||
| [Name_Rule] | ||||
| location_rule=  actor+'/'+number | ||||
| naming_rule=    number+'-'+title | ||||
| location_rule=actor+'/'+number | ||||
| naming_rule=number+'-'+title | ||||
| 
 | ||||
| [update] | ||||
| update_check=1 | ||||
| 
 | ||||
| [media] | ||||
| #emby or plex or kodi ,emby=jellyfin | ||||
| media_warehouse=EMBY | ||||
| media_warehouse=emby | ||||
| #emby or plex or kodi | ||||
| 
 | ||||
| [escape] | ||||
| literals=\() | ||||
| folders=/Volumes/Adult/UnknownStars,/Volumes/Adult/Stars | ||||
| 
 | ||||
| [debug_mode] | ||||
| switch=1 | ||||
| [directory_capture] | ||||
| directory= | ||||
|  | ||||
							
								
								
									
										75
									
								
								fc2fans_club.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						| @ -0,0 +1,75 @@ | ||||
| import re | ||||
| from lxml import etree#need install | ||||
| import json | ||||
| import ADC_function | ||||
| 
 | ||||
| def getTitle(htmlcode): #获取厂商 | ||||
|     html = etree.fromstring(htmlcode,etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/h2/text()')).strip(" ['']") | ||||
|     return result | ||||
| def getActor(htmlcode): | ||||
|     try: | ||||
|         html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|         result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[5]/a/text()')).strip(" ['']") | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| def getStudio(htmlcode): #获取厂商 | ||||
|     try: | ||||
|         html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|         result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[5]/a/text()')).strip(" ['']") | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| def getNum(htmlcode):     #获取番号 | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def getRelease(htmlcode2): # | ||||
|     html=etree.fromstring(htmlcode2,etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def getCover(htmlcode2): #获取厂商 # | ||||
|     html = etree.fromstring(htmlcode2, etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']") | ||||
|     return 'http:' + result | ||||
| def getOutline(htmlcode2):     #获取番号 # | ||||
|     html = etree.fromstring(htmlcode2, etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',') | ||||
|     return result | ||||
| def getTag(htmlcode):     #获取番号 | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = html.xpath('//*[@id="container"]/div[1]/div/article/section[6]/ul/li/a/text()') | ||||
|     return result | ||||
| def getYear(release): | ||||
|     try: | ||||
|         result = re.search('\d{4}',release).group() | ||||
|         return result | ||||
|     except: | ||||
|         return '' | ||||
| 
 | ||||
| def main(number): | ||||
|     number=number.replace('PPV','').replace('ppv','').strip('fc2_').strip('fc2-').strip('ppv-').strip('PPV-').strip('FC2_').strip('FC2-').strip('ppv-').strip('PPV-').replace('fc2ppv-','').replace('FC2PPV-','') | ||||
|     htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'') | ||||
|     #htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html') | ||||
|     dic = { | ||||
|         'title':    getTitle(htmlcode2), | ||||
|         'studio':   getStudio(htmlcode2), | ||||
|         'year':     getYear(getRelease(htmlcode2)), | ||||
|         'outline':  getOutline(htmlcode2), | ||||
|         'runtime':  getYear(getRelease(htmlcode2)), | ||||
|         'director': getStudio(htmlcode2), | ||||
|         'actor':    getStudio(htmlcode2), | ||||
|         'release':  getRelease(htmlcode2), | ||||
|         'number':  'FC2-'+number, | ||||
|         'cover':    getCover(htmlcode2), | ||||
|         'imagecut': 0, | ||||
|         'tag':      getTag(htmlcode2), | ||||
|         'actor_photo':'', | ||||
|         'website':  'http://adult.contents.fc2.com/article_search.php?id=' + number, | ||||
|         'source': 'fc2fans_club.py', | ||||
|     } | ||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') | ||||
|     return js | ||||
| 
 | ||||
| #print(main('1145465')) | ||||
| @ -109,15 +109,14 @@ def main(number): | ||||
|     except: | ||||
|         return main_uncensored(number) | ||||
| 
 | ||||
| 
 | ||||
| def main_uncensored(number):  # 无码 | ||||
| def main_uncensored(number): | ||||
|     htmlcode = get_html('https://www.javbus.com/' + number) | ||||
|     dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) | ||||
|     if getTitle(htmlcode) == '': | ||||
|         htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_')) | ||||
|         dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", '')) | ||||
|     dic = { | ||||
|         'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))).replace(getNum(htmlcode)+'-', ''), | ||||
|         'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''), | ||||
|         'studio': getStudio(htmlcode), | ||||
|         'year': getYear(htmlcode), | ||||
|         'outline': getOutline(dww_htmlcode), | ||||
| @ -136,4 +135,3 @@ def main_uncensored(number):  # 无码 | ||||
|     } | ||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8') | ||||
|     return js | ||||
| 
 | ||||
							
								
								
									
										139
									
								
								javdb.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						| @ -0,0 +1,139 @@ | ||||
| import re | ||||
| from lxml import etree | ||||
| import json | ||||
| from bs4 import BeautifulSoup | ||||
| from ADC_function import * | ||||
| 
 | ||||
| def getTitle(a): | ||||
|     try: | ||||
|         html = etree.fromstring(a, etree.HTMLParser()) | ||||
|         result = str(html.xpath('/html/body/section/div/h2/strong/text()')).strip(" ['']") | ||||
|         return re.sub('.*\] ','',result.replace('/', ',').replace('\\xa0','').replace(' : ','')) | ||||
|     except: | ||||
|         return re.sub('.*\] ','',result.replace('/', ',').replace('\\xa0','')) | ||||
| def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() | ||||
|     html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace(",\\xa0","").replace("'","").replace(' ','').replace(',,','').lstrip(',').replace(',',', ') | ||||
| def getStudio(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"製作")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"製作")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1+result2).strip('+').replace("', '",'').replace('"','') | ||||
| def getRuntime(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').rstrip('mi') | ||||
| def getLabel(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace("', '",'').replace('"','') | ||||
| def getNum(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+') | ||||
| def getYear(getRelease): | ||||
|     try: | ||||
|         result = str(re.search('\d{4}',getRelease).group()) | ||||
|         return result | ||||
|     except: | ||||
|         return getRelease | ||||
| def getRelease(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+') | ||||
| def getTag(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace(",\\xa0","").replace("'","").replace(' ','').replace(',,','').lstrip(',') | ||||
| def getCover(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']") | ||||
|     if result == '': | ||||
|         result = str(html.xpath('/html/body/section/div/div[3]/div[1]/a/img/@src')).strip(" ['']") | ||||
|     return result | ||||
| def getDirector(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']") | ||||
|     result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']") | ||||
|     return str(result1 + result2).strip('+').replace("', '",'').replace('"','') | ||||
| def getOutline(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") | ||||
|     return result | ||||
| def main(number): | ||||
|     try: | ||||
|         a = get_html('https://javdb.com/search?q=' + number + '&f=all') | ||||
|         html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|         result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") | ||||
|         if result1 == '': | ||||
|             a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all') | ||||
|             html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|             result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") | ||||
|         b = get_html('https://javdb1.com' + result1) | ||||
|         soup = BeautifulSoup(b, 'lxml') | ||||
|         a = str(soup.find(attrs={'class': 'panel'})) | ||||
|         dic = { | ||||
|             'actor': getActor(a), | ||||
|             'title': getTitle(b).replace("\\n", '').replace('        ', '').replace(getActor(a), '').replace(getNum(a), | ||||
|                                                                                                              '').replace( | ||||
|                 '无码', '').replace('有码', '').lstrip(' '), | ||||
|             'studio': getStudio(a), | ||||
|             'outline': getOutline(a), | ||||
|             'runtime': getRuntime(a), | ||||
|             'director': getDirector(a), | ||||
|             'release': getRelease(a), | ||||
|             'number': getNum(a), | ||||
|             'cover': getCover(b), | ||||
|             'imagecut': 0, | ||||
|             'tag': getTag(a), | ||||
|             'label': getLabel(a), | ||||
|             'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()), | ||||
|             'actor_photo': '', | ||||
|             'website': 'https://javdb1.com' + result1, | ||||
|             'source': 'javdb.py', | ||||
|         } | ||||
|         js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8') | ||||
|         return js | ||||
|     except: | ||||
|         a = get_html('https://javdb.com/search?q=' + number + '&f=all') | ||||
|         html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|         result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") | ||||
|         if result1 == '' or result1 == 'null': | ||||
|             a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all') | ||||
|             html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|             result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']") | ||||
|         b = get_html('https://javdb.com' + result1) | ||||
|         soup = BeautifulSoup(b, 'lxml') | ||||
|         a = str(soup.find(attrs={'class': 'panel'})) | ||||
|         dic = { | ||||
|             'actor': getActor(a), | ||||
|             'title': getTitle(b).replace("\\n", '').replace('        ', '').replace(getActor(a), '').replace( | ||||
|                 getNum(a), | ||||
|                 '').replace( | ||||
|                 '无码', '').replace('有码', '').lstrip(' '), | ||||
|             'studio': getStudio(a), | ||||
|             'outline': getOutline(a), | ||||
|             'runtime': getRuntime(a), | ||||
|             'director': getDirector(a), | ||||
|             'release': getRelease(a), | ||||
|             'number': getNum(a), | ||||
|             'cover': getCover(b), | ||||
|             'imagecut': 0, | ||||
|             'tag': getTag(a), | ||||
|             'label': getLabel(a), | ||||
|             'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()), | ||||
|             'actor_photo': '', | ||||
|             'website': 'https://javdb.com' + result1, | ||||
|             'source': 'javdb.py', | ||||
|         } | ||||
|         js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,separators=(',', ':'), )  # .encode('UTF-8') | ||||
|         return js | ||||
| 
 | ||||
| #print(main('061519-861')) | ||||
							
								
								
									
										0
									
								
								readme/This is readms.md's images folder
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
							
								
								
									
										0
									
								
								readme/flow_chart2.png
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| Before Width: | Height: | Size: 101 KiB After Width: | Height: | Size: 101 KiB | 
							
								
								
									
										0
									
								
								readme/readme1.PNG
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| Before Width: | Height: | Size: 1.1 KiB After Width: | Height: | Size: 1.1 KiB | 
							
								
								
									
										0
									
								
								readme/readme2.PNG
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| Before Width: | Height: | Size: 3.4 KiB After Width: | Height: | Size: 3.4 KiB | 
							
								
								
									
										0
									
								
								readme/readme3.PNG
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| Before Width: | Height: | Size: 1.3 KiB After Width: | Height: | Size: 1.3 KiB | 
							
								
								
									
										0
									
								
								readme/readme4.PNG
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| Before Width: | Height: | Size: 16 KiB After Width: | Height: | Size: 16 KiB | 
							
								
								
									
										
											BIN
										
									
								
								readme/readme5.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						| After Width: | Height: | Size: 457 KiB | 
							
								
								
									
										0
									
								
								readme/single.gif
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| Before Width: | Height: | Size: 68 KiB After Width: | Height: | Size: 68 KiB | 
| @ -1 +0,0 @@ | ||||
| 1 | ||||
| Before Width: | Height: | Size: 101 KiB | 
| Before Width: | Height: | Size: 1.1 KiB | 
| Before Width: | Height: | Size: 3.4 KiB | 
| Before Width: | Height: | Size: 1.3 KiB | 
| Before Width: | Height: | Size: 16 KiB | 
| @ -1 +0,0 @@ | ||||
| pipenv install -rlxml bs4 pillow pyquery | ||||
| Before Width: | Height: | Size: 68 KiB | 
| @ -3,14 +3,11 @@ from lxml import etree | ||||
| import json | ||||
| from bs4 import BeautifulSoup | ||||
| from ADC_function import * | ||||
| # import sys | ||||
| # import io | ||||
| # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True) | ||||
| 
 | ||||
| def getTitle(a): | ||||
|     try: | ||||
|         html = etree.fromstring(a, etree.HTMLParser()) | ||||
|         result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']") | ||||
|         result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']") | ||||
|         return result.replace('/', ',') | ||||
|     except: | ||||
|         return '' | ||||
| @ -65,8 +62,7 @@ def getTag(a): | ||||
|     return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','') | ||||
| def getCover(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']") | ||||
|     #                    /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src | ||||
|     result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']") | ||||
|     return result | ||||
| def getDirector(a): | ||||
|     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
| @ -81,7 +77,7 @@ def getOutline(htmlcode): | ||||
|     return result | ||||
| def main(number2): | ||||
|     number=number2.upper() | ||||
|     htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'})) | ||||
|     htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}) | ||||
|     soup = BeautifulSoup(htmlcode, 'lxml') | ||||
|     a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','') | ||||
|     dic = { | ||||
| @ -100,9 +96,9 @@ def main(number2): | ||||
|         'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()), | ||||
|         'actor_photo': '', | ||||
|         'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/', | ||||
|         'source': 'mgstage.py', | ||||
|         'source': 'siro.py', | ||||
|     } | ||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8') | ||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') | ||||
|     return js | ||||
| 
 | ||||
| #print(main('SIRO-3607')) | ||||
| #print(main('300maan-373')) | ||||
							
								
								
									
										80
									
								
								test.py
									
									
									
									
									
								
							
							
						
						| @ -1,80 +0,0 @@ | ||||
| import os | ||||
| import re | ||||
| from itertools import groupby | ||||
| 
 | ||||
| import fuckit as fuckit | ||||
| import pandas as pd | ||||
| from tenacity import retry, stop_after_delay, wait_fixed | ||||
| 
 | ||||
| 
 | ||||
| def go(): | ||||
|     a = [1, 2, 3, 4, 5, 6] | ||||
|     # [print(x) for x in a] | ||||
|     # [print(x) for x in a] | ||||
|     a1 = groupby(a, key=lambda k: (k / 2)) | ||||
|     for i in a1: | ||||
|         print(i) | ||||
|     for i in a1: | ||||
|         print(i) | ||||
| 
 | ||||
| 
 | ||||
| class TryDo: | ||||
|     def __init__(self, func, times=3): | ||||
|         self.tries = times | ||||
|         self.func = func | ||||
| 
 | ||||
|     def __iter__(self): | ||||
|         self.currentTry = 1 | ||||
|         return self | ||||
| 
 | ||||
|     def __next__(self): | ||||
|         if self.currentTry > self.tries: | ||||
|             raise StopIteration(False) | ||||
|         else: | ||||
|             self.currentTry += 1 | ||||
|             self.func() | ||||
|             raise StopIteration(True) | ||||
| 
 | ||||
|     # def do(self): | ||||
| 
 | ||||
| 
 | ||||
| @retry(stop=stop_after_delay(3), wait=wait_fixed(2)) | ||||
| def stop_after_10_s(): | ||||
|     print("Stopping after 10 seconds") | ||||
|     raise Exception | ||||
| 
 | ||||
| 
 | ||||
| # f = iter( TryDo(do_something, 5)) | ||||
| 
 | ||||
| # stop_after_10_s() | ||||
| def errorfunc(): | ||||
|     raise Exception | ||||
| 
 | ||||
| 
 | ||||
| def okfunc(): | ||||
|     print("ok") | ||||
| 
 | ||||
| 
 | ||||
| # with fuckit: | ||||
| #     errorfunc() | ||||
| #     okfunc() | ||||
| # re.match() | ||||
| 
 | ||||
| r = re.search(r'(?<=999)-?((?P<alpha>([A-Z](?![A-Z])))|(?P<num>\d(?!\d)))', "IPTD-999-B-彼女の姉貴とイケナイ関係-RIO", re.I) | ||||
| # | ||||
| print(r.groupdict()) | ||||
| print(r.groupdict()['alpha']) | ||||
| print(r.group(2)) | ||||
| import re | ||||
| 
 | ||||
| line = "Cats are smarter than dogs" | ||||
| matchObj = re.search(r'(?<=a)(.*) are (.*?) .*', line, re.M | re.I) | ||||
| if matchObj: | ||||
|     print("matchObj.group() : ", matchObj.group()) | ||||
|     print("matchObj.group(1) : ", matchObj.group(1)) | ||||
|     print("matchObj.group(2) : ", matchObj.group(2)) | ||||
| else: | ||||
|     print("No match!!") | ||||
| 
 | ||||
| # print(r[-1]) | ||||
| # print(newList) | ||||
							
								
								
									
										6
									
								
								update_check.json
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						| @ -1,5 +1,5 @@ | ||||
| { | ||||
| 	"version": "2.8.2", | ||||
| 	"version_show":"2.8.2", | ||||
| 	"download": "https://github.com/yoshiko2/AV_Data_Capture/releases" | ||||
| 	"version": "1.3", | ||||
| 	"version_show":"1.3", | ||||
| 	"download": "https://github.com/wenead99/AV_Data_Capture/releases" | ||||
| } | ||||