优化正则等，修改逻辑，避免被覆盖

to learn goupby learn pandas groupby groupby learn pandas groupby 优化正则提取番号和集数待理解下载图片逻辑还有剪裁+背景图逻辑修改所有config[ 将整理生成nfo的代码可缓存番号信息和缩略图和海报可以识别番号后集数和尾部集数,赞不能分辨-C中文字幕片改正一个错误嵌套字典存储数据整理函数修正匹配时间正则 pipenv 添加依赖修改优先取三位数字的规则：heyzo四位数除外添加了依赖和有番号的优化修改了啥我也记不得了
Merge pull request #163 from 68cdrBxM8YdoJ/master
2022-10-09 20:47:38 +08:00 · 2020-03-21 21:44:40 +08:00 · 2020-03-20 14:04:00 +09:00 · 2020-03-16 15:37:13 +08:00 · 2020-03-13 11:54:50 +08:00 · 2020-03-13 01:19:07 +08:00
54 changed files with 3333 additions and 1695 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -0,0 +1 @@
 *.py text=auto eol=lf
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 *.DS_Store
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
--- a/.idea/.gitignore
+++ b/.idea/.gitignore
@ -0,0 +1,2 @@
 # Default ignored files
 /workspace.xml
--- a/.idea/AV_Data_Capture.iml
+++ b/.idea/AV_Data_Capture.iml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="jdk" jdkName="Python 3.8 (AV_Data_Capture)" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
 </module>
--- a/.idea/dictionaries/tanpengsccd.xml
+++ b/.idea/dictionaries/tanpengsccd.xml
@ -0,0 +1,19 @@
 <component name="ProjectDictionaryState">
  <dictionary name="tanpengsccd">
    <words>
      <w>avsox</w>
      <w>emby</w>
      <w>fanart</w>
      <w>fanza</w>
      <w>javbus</w>
      <w>javdb</w>
      <w>jellyfin</w>
      <w>khtml</w>
      <w>kodi</w>
      <w>mgstage</w>
      <w>plex</w>
      <w>pondo</w>
      <w>rmvb</w>
    </words>
  </dictionary>
 </component>
--- a/.idea/inspectionProfiles/profiles_settings.xml
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@ -0,0 +1,6 @@
 <component name="InspectionProjectProfileManager">
  <settings>
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
 </component>
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -0,0 +1,7 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="JavaScriptSettings">
    <option name="languageLevel" value="ES6" />
  </component>
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.8 (AV_Data_Capture)" project-jdk-type="Python SDK" />
 </project>
--- a/.idea/modules.xml
+++ b/.idea/modules.xml
@ -0,0 +1,8 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/AV_Data_Capture.iml" filepath="$PROJECT_DIR$/.idea/AV_Data_Capture.iml" />
    </modules>
  </component>
 </project>
--- a/.idea/other.xml
+++ b/.idea/other.xml
@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="PySciProjectComponent">
    <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
  </component>
 </project>
--- a/.idea/vcs.xml
+++ b/.idea/vcs.xml
@ -0,0 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="$PROJECT_DIR$" vcs="Git" />
  </component>
 </project>
--- a/ADC_function.py
+++ b/ADC_function.py
@ -1,115 +1,127 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
-
+
-import requests
+import requests
-from configparser import ConfigParser
+from configparser import ConfigParser
-import os
+import os
-import re
+import re
-import time
+import time
-import sys
+import sys
-from lxml import etree
+from lxml import etree
-import sys
+import sys
-import io
+import io
-# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+from ConfigApp import ConfigApp
-# sys.setdefaultencoding('utf-8')
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-
+# sys.setdefaultencoding('utf-8')
-config_file='config.ini'
+
-config = ConfigParser()
+# config_file='config.ini'
-
+# config = ConfigParser()
-if os.path.exists(config_file):
+
-    try:
+# if os.path.exists(config_file):
-        config.read(config_file, encoding='UTF-8')
+#     try:
-    except:
+#         config.read(config_file, encoding='UTF-8')
-        print('[-]Config.ini read failed! Please use the offical file!')
+#     except:
-else:
+#         print('[-]Config.ini read failed! Please use the offical file!')
-    print('[+]config.ini: not found, creating...',end='')
+# else:
-    with open("config.ini", "wt", encoding='UTF-8') as code:
+#     print('[+]config.ini: not found, creating...',end='')
-        print("[common]", file=code)
+#     with open("config.ini", "wt", encoding='UTF-8') as code:
-        print("main_mode = 1", file=code)
+#         print("[common]", file=code)
-        print("failed_output_folder = failed", file=code)
+#         print("main_mode = 1", file=code)
-        print("success_output_folder = JAV_output", file=code)
+#         print("failed_output_folder = failed", file=code)
-        print("", file=code)
+#         print("success_output_folder = JAV_output", file=code)
-        print("[proxy]",file=code)
+#         print("", file=code)
-        print("proxy=127.0.0.1:1081",file=code)
+#         print("[proxy]",file=code)
-        print("timeout=10", file=code)
+#         print("proxy=127.0.0.1:1081",file=code)
-        print("retry=3", file=code)
+#         print("timeout=10", file=code)
-        print("", file=code)
+#         print("retry=3", file=code)
-        print("[Name_Rule]", file=code)
+#         print("", file=code)
-        print("location_rule=actor+'/'+number",file=code)
+#         print("[Name_Rule]", file=code)
-        print("naming_rule=number+'-'+title",file=code)
+#         print("location_rule=actor+'/'+number",file=code)
-        print("", file=code)
+#         print("naming_rule=number+'-'+title",file=code)
-        print("[update]",file=code)
+#         print("", file=code)
-        print("update_check=1",file=code)
+#         print("[update]",file=code)
-        print("", file=code)
+#         print("update_check=1",file=code)
-        print("[media]", file=code)
+#         print("", file=code)
-        print("media_warehouse=emby", file=code)
+#         print("[media]", file=code)
-        print("#emby plex kodi", file=code)
+#         print("media_warehouse=emby", file=code)
-        print("", file=code)
+#         print("#emby plex kodi", file=code)
-        print("[escape]", file=code)
+#         print("", file=code)
-        print("literals=\\", file=code)
+#         print("[escape]", file=code)
-        print("", file=code)
+#         print("literals=\\", file=code)
-        print("[movie_location]", file=code)
+#         print("", file=code)
-        print("path=", file=code)
+#         print("[movie_location]", file=code)
-        print("", file=code)
+#         print("path=", file=code)
-        print('.',end='')
+#         print("", file=code)
-    time.sleep(2)
+#         print('.',end='')
-    print('.')
+#     time.sleep(2)
-    print('[+]config.ini: created!')
+#     print('.')
-    print('[+]Please restart the program!')
+#     print('[+]config.ini: created!')
-    time.sleep(4)
+#     print('[+]Please restart the program!')
-    os._exit(0)
+#     time.sleep(4)
-    try:
+#     os._exit(0)
-        config.read(config_file, encoding='UTF-8')
+#     try:
-    except:
+#         config.read(config_file, encoding='UTF-8')
-        print('[-]Config.ini read failed! Please use the offical file!')
+#     except:
-
+#         print('[-]Config.ini read failed! Please use the offical file!')
-def getDataState(json_data):  # 元数据获取失败检测
+
-    if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
+config = ConfigApp()
-        return 0
+
-    else:
+
-        return 1
+def get_network_settings():
-
+    try:
-def ReadMediaWarehouse():
+        proxy = config.proxy
-    return config['media']['media_warehouse']
+        timeout = int(config.timeout)
-
+        retry_count = int(config.retry)
-def UpdateCheckSwitch():
+        assert timeout > 0
-    check=str(config['update']['update_check'])
+        assert retry_count > 0
-    if check == '1':
+    except:
-        return '1'
+        raise ValueError("[-]Proxy config error! Please check the config.")
-    elif check == '0':
+    return proxy, timeout, retry_count
-        return '0'
+
-    elif check == '':
+def getDataState(json_data):  # 元数据获取失败检测
-        return '0'
+    if json_data['title'] == '' or json_data['title'] == 'None' or json_data['title'] == 'null':
-
+        return 0
-def getXpathSingle(htmlcode,xpath):
+    else:
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+        return 1
-    result1 = str(html.xpath(xpath)).strip(" ['']")
+
-    return result1
+def ReadMediaWarehouse():
-
+    return config.media_server
-def get_html(url,cookies = None):#网页请求核心
+
-    try:
+def UpdateCheckSwitch():
-        proxy = config['proxy']['proxy']
+    check=str(config.update_check)
-        timeout = int(config['proxy']['timeout'])
+    if check == '1':
-        retry_count = int(config['proxy']['retry'])
+        return '1'
-    except:
+    elif check == '0':
-        print('[-]Proxy config error! Please check the config.')
+        return '0'
-    i = 0
+    elif check == '':
-    while i < retry_count:
+        return '0'
-        try:
+
-            if not str(config['proxy']['proxy']) == '':
+def getXpathSingle(htmlcode,xpath):
-                proxies = {"http": "http://" + proxy,"https": "https://" + proxy}
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
+    result1 = str(html.xpath(xpath)).strip(" ['']")
-                getweb = requests.get(str(url), headers=headers, timeout=timeout,proxies=proxies, cookies=cookies)
+    return result1
-                getweb.encoding = 'utf-8'
+
-                return getweb.text
+def get_html(url,cookies = None):#网页请求核心
-            else:
+    proxy, timeout, retry_count = get_network_settings()
-                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+    i = 0
-                getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
+    print(url)
-                getweb.encoding = 'utf-8'
+    while i < retry_count:
-                return getweb.text
+        try:
-        except:
+            if not proxy == '':
-            i += 1
+                proxies = {"http": proxy, "https": proxy}
-            print('[-]Connect retry '+str(i)+'/'+str(retry_count))
+                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
-    print('[-]Connect Failed! Please check your Proxy or Network!')
+                getweb = requests.get(str(url), headers=headers, timeout=timeout, proxies=proxies, cookies=cookies)
-
+                getweb.encoding = 'utf-8'
-
+                return getweb.text
            else:
                headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
                getweb = requests.get(str(url), headers=headers, timeout=timeout, cookies=cookies)
                getweb.encoding = 'utf-8'
                return getweb.text
        except Exception as e:
            print(e)
            i += 1
            print('[-]Connect retry '+str(i)+'/'+str(retry_count))
    print('[-]Connect Failed! Please check your Proxy or Network!')
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@ -1,164 +1,416 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
+# -*- coding: utf-8 -*-
-
+
-import glob
+import glob
-import os
+import os
-import time
+import time
-import re
+import fuckit
-from ADC_function import *
+from tenacity import retry, stop_after_delay, wait_fixed
-import json
+import json
-import shutil
+import shutil
-import fnmatch
+import itertools
-from configparser import ConfigParser
+import argparse
-os.chdir(os.getcwd())
+from pathlib import Path
-
+
-# ============global var===========
+from core import *
-
+from ConfigApp import ConfigApp
-version='2.1'
+from PathNameProcessor import PathNameProcessor
-
+
-config = ConfigParser()
+# TODO 封装聚合解耦：CORE
-config.read(config_file, encoding='UTF-8')
+# TODO （学习）统一依赖管理工具
-
+# TODO 不同媒体服务器尽量兼容统一一种元数据 如nfo 海报等（emby，jellyfin，plex）
-Platform = sys.platform
+# TODO 字幕整理功能 文件夹中读取所有字幕 并提番号放入对应缓存文件夹中TEMP
-
+
-# ==========global var end=========
+config = ConfigApp()
-
+
-def moveMovies():
+
-    movieFiles = []
+def safe_list_get(list_in, idx, default=None):
-    fromPath = config['movie_location']['path']
+    """
-    if Platform == 'win32':
+    数组安全取值
-        movieFormat = ["avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"]
+    :param list_in:
-    else:
+    :param idx:
-        movieFormat = ["AVI", "RMVB", "WMV", "MOV", "MP4", "MKV", "FLV", "TS","avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"]
+    :param default:
-    for fm in movieFormat:
+    :return:
-        movieFiles = movieFiles + [os.path.join(dirpath, f)
+    """
-            for dirpath, dirnames, files in os.walk(fromPath)
+    try:
-            for f in fnmatch.filter(files, '*.' + fm)]
+        return list_in[idx]
-    for movie in movieFiles:
+    except IndexError:
-        print("Move file " + movie)
+        return default
-        shutil.move(movie, os.path.curdir)
+
-def UpdateCheck():
+
-    if UpdateCheckSwitch() == '1':
+def UpdateCheck(version):
-        html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
+    if UpdateCheckSwitch() == '1':
-        html = json.loads(str(html2))
+        html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
-
+        html = json.loads(str(html2))
-        if not version == html['version']:
+
-            print('[*]                  * New update ' + html['version'] + ' *')
+        if not version == html['version']:
-            print('[*]                     ↓ Download ↓')
+            print('[*]                  * New update ' + html['version'] + ' *')
-            print('[*] ' + html['download'])
+            print('[*]                     ↓ Download ↓')
-            print('[*]======================================================')
+            print('[*] ' + html['download'])
-    else:
+            print('[*]======================================================')
-        print('[+]Update Check disabled!')
+    else:
-def movie_lists():
+        print('[+]Update Check disabled!')
-    global exclude_directory_1
+
-    global exclude_directory_2
+
-    total=[]
+def argparse_get_file():
-    file_type = ['mp4','avi','rmvb','wmv','mov','mkv','flv','ts']
+    parser = argparse.ArgumentParser()
-    exclude_directory_1 = config['common']['failed_output_folder']
+    parser.add_argument("file", default='', nargs='?', help="Write the file path on here")
-    exclude_directory_2 = config['common']['success_output_folder']
+    args = parser.parse_args()
-    for a in file_type:
+    if args.file == '':
-        total += glob.glob(r"./*." + a)
+        return ''
-    return total
+    else:
-def CreatFailedFolder():
+        return args.file
-    if not os.path.exists('failed/'):  # 新建failed文件夹
+
-        try:
+
-            os.makedirs('failed/')
+def movie_lists(escape_folders):
-        except:
+    escape_folders = re.split('[,，]', escape_folders)
-            print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
+    total = []
-            os._exit(0)
+
-def lists_from_test(custom_nuber): #电影列表
+    for root, dirs, files in os.walk(config.search_folder):
-    a=[]
+        if root in escape_folders:
-    a.append(custom_nuber)
+            continue
-    return a
+        for file in files:
-def CEF(path):
+            if re.search(PathNameProcessor.pattern_of_file_name_suffixes, file, re.IGNORECASE):
-    try:
+                path = os.path.join(root, file)
-        files = os.listdir(path)  # 获取路径下的子文件(夹)列表
+                total.append(path)
-        for file in files:
+    return total
-            os.removedirs(path + '/' + file)  # 删除这个空文件夹
+
-            print('[+]Deleting empty folder', path + '/' + file)
+
-    except:
+# def CEF(path):
-        a=''
+#     try:
-def rreplace(self, old, new, *max):
+#         files = os.listdir(path)  # 获取路径下的子文件(夹)列表
-#从右开始替换文件名中内容，源字符串，将被替换的子字符串， 新字符串，用于替换old子字符串，可选字符串, 替换不超过 max 次
+#         for file in files:
-    count = len(self)
+#             os.removedirs(path + '/' + file)  # 删除这个空文件夹
-    if max and str(max[0]).isdigit():
+#             print('[+]Deleting empty folder', path + '/' + file)
-        count = max[0]
+#     except:
-    return new.join(self.rsplit(old, count))
+#         a = ''
-def getNumber(filepath):
+#
-    filepath = filepath.replace('.\\','')
+
-    try:  # 普通提取番号 主要处理包含减号-的番号
+
-        filepath = filepath.replace("_", "-")
+def get_numbers(paths):
-        filepath.strip('22-sht.me').strip('-HD').strip('-hd')
+    """提取对应路径的番号+集数"""
-        filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath))  # 去除文件名中时间
+
-        if 'FC2' or 'fc2' in filename:
+    def get_number(filepath, absolute_path=False):
-            filename=filename.replace('-PPV','').replace('PPV-','')
+        """
-        try:
+        获取番号，集数
-            file_number = re.search('\w+-\d+', filename).group()
+        :param filepath:
-        except:  # 提取类似mkbd-s120番号
+        :param absolute_path:
-            file_number = re.search('\w+-\w+\d+', filename).group()
+        :return:
-        return file_number
+        """
-    except:  # 提取不含减号-的番号
+        name = filepath.upper()  # 转大写
-        try:
+        if absolute_path:
-            filename = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
+            name = name.replace('\\', '/')
-            filename = str(re.sub(".*?\.com-\d+", "", filename)).replace('_', '')
+        # 移除干扰字段
-            file_number = str(re.search('\w+\d{4}', filename).group(0))
+        name = PathNameProcessor.remove_distractions(name)
-            return file_number
+        # 抽取 文件路径中可能存在的尾部集数，和抽取尾部集数的后的文件路径
-        except:  # 提取无减号番号
+        suffix_episode, name = PathNameProcessor.extract_suffix_episode(name)
-            filename = str(re.sub("ts6\d", "", filepath))  # 去除ts64/265
+        # 抽取 文件路径中可能存在的 番号后跟随的集数 和 处理后番号
-            filename = str(re.sub(".*?\.com-\d+", "", filename))
+        episode_behind_code, code_number = PathNameProcessor.extract_code(name)
-            file_number = str(re.match('\w+', filename).group())
+        # 无番号 则设置空字符
-            file_number = str(file_number.replace(re.match("^[A-Za-z]+", file_number).group(),re.match("^[A-Za-z]+", file_number).group() + '-'))
+        code_number = code_number if code_number else ''
-            return file_number
+        # 优先取尾部集数，无则取番号后的集数（几率低），都无则为空字符
-
+        episode = suffix_episode if suffix_episode else episode_behind_code if episode_behind_code else ''
-def RunCore():
+
-    if Platform == 'win32':
+        return code_number, episode
-        if os.path.exists('core.py'):
+
-            os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
+    maps = {}
-        elif os.path.exists('core.exe'):
+    for path in paths:
-            os.system('core.exe' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从exe启动（用于EXE版程序）
+        number, episode = get_number(path)
-        elif os.path.exists('core.py') and os.path.exists('core.exe'):
+        maps[path] = (number, episode)
-            os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
+
-    else:
+    return maps
-        if os.path.exists('core.py'):
+
-            os.system('python3 core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
+
-        elif os.path.exists('core.exe'):
+def create_folder(paths):
-            os.system('core.exe' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从exe启动（用于EXE版程序）
+    for path_to_make in paths:
-        elif os.path.exists('core.py') and os.path.exists('core.exe'):
+        if path_to_make:
-            os.system('python3 core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
+            try:
-
+                os.makedirs(path_to_make)
-if __name__ =='__main__':
+            except FileExistsError as e:
-    print('[*]================== AV Data Capture ===================')
+                # name = f'{folder=}'.split('=')[0].split('.')[-1]
-    print('[*]                     Version '+version)
+                print(path_to_make + " 已经存在")
-    print('[*]======================================================')
+                pass
-    CreatFailedFolder()
+            except Exception as exception:
-    UpdateCheck()
+                print('! 创建文件夹 ' + path_to_make + ' 失败，文件夹路径错误或权限不够')
-    moveMovies()
+                raise exception
-    os.chdir(os.getcwd())
+        else:
-
+            raise Exception('！创建的文件夹路径为空，请确认')
-    count = 0
+
-    count_all = str(len(movie_lists()))
+
-    print('[+]Find',str(len(movie_lists())),'movies')
+if __name__ == '__main__':
-    if config['common']['soft_link'] == '1':
+    version = '2.8.2'
-        print('[!] --- Soft link mode is ENABLE! ----')
+
-    for i in movie_lists(): #遍历电影列表 交给core处理
+    print('[*]================== AV Data Capture ===================')
-        count = count + 1
+    print('[*]                    Version ' + version)
-        percentage = str(count/int(count_all)*100)[:4]+'%'
+    print('[*]======================================================')
-        print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -')
+
-        try:
+    # UpdateCheck(version)
-            print("[!]Making Data for   [" + i + "], the number is [" + getNumber(i) + "]")
+
-            RunCore()
+    CreatFailedFolder(config.failed_folder)
-            print("[*]======================================================")
+    os.chdir(os.getcwd())
-        except:  # 番号提取异常
+
-            print('[-]' + i + ' Cannot catch the number :')
+    # 创建文件夹
-            if config['common']['soft_link'] == '1':
+    create_folder([config.failed_folder, config.search_folder, config.temp_folder])
-                print('[-]Link',i,'to failed folder')
+
-                os.symlink(i,str(os.getcwd()) + '/' + 'failed/')
+    # temp 文件夹中infos放 番号json信息，pics中放图片信息
-            else:
+    path_infos = config.temp_folder + '/infos'
-                print('[-]Move ' + i + ' to failed folder')
+    path_pics = config.temp_folder + '/pics'
-                shutil.move(i, str(os.getcwd()) + '/' + 'failed/')
+
-            continue
+    create_folder([path_infos, path_pics])
-
+
-    CEF(exclude_directory_1)
+    # 遍历搜索目录下所有视频的路径
-    CEF(exclude_directory_2)
+    movie_list = movie_lists(config.escape_folder)
-    print("[+]All finished!!!")
+
-    input("[+][+]Press enter key exit, you can check the error messge before you exit.")
+    # 以下是从文本中提取测试的数据
    # f = open('TestPathNFO.txt', 'r')
    # f = open('TestPathSpecial.txt', 'r')
    # movie_list = [line[:-1] for line in f.readlines()]
    # f.close()
    # 获取 番号,集数,路径  的字典->list
    code_ep_paths = [[codeEposode[0], codeEposode[1], path] for path, codeEposode in get_numbers(movie_list).items()]
    [print(i) for i in code_ep_paths]
    #  按番号分组片子列表（重点），用于寻找相同番号的片子
    '''
    这里利用pandas分组 "https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html"
    '''
    # # 设置打印时显示所有列
    # pd.set_option('display.max_columns', None)
    # # 显示所有行
    # pd.set_option('display.max_rows', None)
    # # 设置value的显示长度为100，默认为50
    # pd.set_option('max_colwidth', 30)
    # # 创建框架
    # df = pd.DataFrame(code_ep_paths, columns=('code', 'ep', 'path'))
    # # 以番号分组
    # groupedCode_code_ep_paths = df.groupby(['code'])
    # # print(df.groupby(['code', 'ep']).describe().unstack())
    # grouped_code_ep = df.groupby(['code', 'ep'])['path']
    #
    sorted_code_list = sorted(code_ep_paths, key=lambda code_ep_path: code_ep_path[0])
    group_code_list = itertools.groupby(sorted_code_list, key=lambda code_ep_path: code_ep_path[0])
    def group_code_list_to_dict(group_code_list):
        data_dict = {}
        for code, code_ep_path_group in group_code_list:
            code_ep_path_list = list(code_ep_path_group)
            eps_of_code = {}
            group_ep_list = itertools.groupby(code_ep_path_list, key=lambda code_ep_path: code_ep_path[1])
            for ep, group_ep_group in group_ep_list:
                group_ep_list = list(group_ep_group)
                eps_of_code[ep] = [code_ep_path[2] for code_ep_path in group_ep_list]
            data_dict[code] = eps_of_code
        return data_dict
    def print_same_code_ep_path(data_dict_in):
        for code_in in data_dict_in:
            ep_path_list = data_dict_in[code_in]
            if len(ep_path_list) > 1:
                print('--' * 60)
                print("|" + (code_in if code_in else 'unknown') + ":")
                # group_ep_list = itertools.groupby(code_ep_path_list.items(), key=lambda code_ep_path: code_ep_path[0])
                for ep in ep_path_list:
                    path_list = ep_path_list[ep]
                    print('--' * 12)
                    ep = ep if ep else ' '
                    if len(path_list) == 1:
                        print('|           集数:' + ep + ' 文件: ' + path_list[0])
                    else:
                        print('|           集数:' + ep + ' 文件: ')
                        for path in path_list:
                            print('|                       ' + path)
            else:
                pass
    # 分好组的数据 {code:{ep:[path]}}
    data_dict_groupby_code_ep = group_code_list_to_dict(group_code_list)
    print('--' * 100)
    print("找到影片数量:" + str(len(movie_list)))
    print("合计番号数量:" + str(len(data_dict_groupby_code_ep)) + "  (多个相同番号的影片只统计一个，不能识别的番号 都统一为'unknown')")
    print('Warning:!!!! 以下为相同番号的电影明细')
    print('◤' + '--' * 80)
    print_same_code_ep_path(data_dict_groupby_code_ep)
    print('◣' + '--' * 80)
    isContinue = input('任意键继续? N 退出 \n')
    if isContinue.strip(' ') == "N":
        exit(1)
    # ========== 野鸡番号拖动 ==========
    # number_argparse = argparse_get_file()
    # if not number_argparse == '':
    #     print("[!]Making Data for   [" + number_argparse + "], the number is [" + getNumber(number_argparse,
    #                                                                                         absolute_path=True) + "]")
    #     nfo = core_main(number_argparse, getNumber(number_argparse, absolute_path=True))
    #     print("[*]======================================================")
    #     CEF(config.success_folder)
    #     CEF(config.failed_folder)
    #     print("[+]All finished!!!")
    #     input("[+][+]Press enter key exit, you can check the error messge before you exit.")
    #     os._exit(0)
    # ========== 野鸡番号拖动 ==========
    def download_code_infos(code_list, is_read_cache=True):
        """
         遍历按番号分组的集合，刮取番号信息并缓存
        :param is_read_cache: 是否读取缓存数据
        :param code_list:
        :return: {code:nfo}
        """
        count_all_grouped = len(code_list)
        count = 0
        code_info_dict = {}
        for code in code_list:
            count = count + 1
            percentage = str(count / int(count_all_grouped) * 100)[:4] + '%'
            print('[!] - ' + percentage + ' [' + str(count) + '/' + str(count_all_grouped) + '] -')
            try:
                print("[!]搜刮数据 [" + code + "]")
                if code:
                    # 创建番号的文件夹
                    file_path = path_infos + '/' + code + '.json'
                    nfo = {}
                    # 读取缓存信息，如果没有则联网搜刮
                    path = Path(file_path)
                    if is_read_cache and (path.exists() and path.is_file() and path.stat().st_size > 0):
                        print('找到缓存信息')
                        with open(file_path) as fp:
                            nfo = json.load(fp)
                    else:
                        # 核心功能 - 联网抓取信息字典
                        print('联网搜刮')
                        nfo = core_main(code)
                        print('正在写入', end='')
                        # 把缓存信息写入缓存文件夹中，有时会设备占用而失败，重试即可
                        @retry(stop=stop_after_delay(3), wait=wait_fixed(2))
                        def read_file():
                            with open(file_path, 'w') as fp:
                                json.dump(nfo, fp)
                        read_file()
                        print('完成！')
                    # 将番号信息放入字典
                    code_info_dict[code] = nfo
                    print("[*]======================================================")
            except Exception as e:  # 番号的信息获取失败
                code_info_dict[code] = ''
                print("找不到信息:" + code + ',Reason:' + str(e))
                # if config.soft_link:
                #     print('[-]Link', file_path_name, 'to failed folder')
                #     os.symlink(file_path_name, config.failed_folder + '/')
                # else:
                #     try:
                #         print('[-]Move ' + file_path_name + ' to failed folder:' + config.failed_folder)
                #         shutil.move(file_path_name, config.failed_folder + '/')
                #     except FileExistsError:
                #         print('[!]File exists in failed!')
                #     except:
                #         print('[+]skip')
                continue
        return code_info_dict
    print('----------------------------------')
    code_infos = download_code_infos(data_dict_groupby_code_ep)
    print("----未找到番号数据的番号----")
    print([print(code) for code in code_infos if code_infos[code] == ''])
    print("-------------------------")
    def download_images_of_nfos(code_info_dict):
        """
        遍历番号信息，下载番号电影的海报，图片
        :param code_info_dict:
        :return: 无图片的信息的番号
        """
        code_list_empty_image = []
        for code in code_info_dict:
            nfo = code_info_dict[code]
            if len(nfo.keys()) == 0:
                code_list_empty_image.append(code)
                continue
            code_pics_folder_to_save = path_pics + '/' + code
            # 1 创建 番号文件夹
            os.makedirs(code_pics_folder_to_save, exist_ok=True)
            #  下载缩略图
            if nfo['imagecut'] == 3:  # 3 是缩略图
                path = Path(code_pics_folder_to_save + '/' + 'thumb.png')
                if path.exists() and path.is_file() and path.stat().st_size > 0:
                    print(code + '：缩略图已有缓存')
                else:
                    print(code + '：缩略图下载中...')
                    download_file(nfo['cover_small'], code_pics_folder_to_save, 'thumb.png')
                    print(code + '：缩略图下载完成')
            #  下载海报
            path = Path(code_pics_folder_to_save + '/' + 'poster.png')
            if path.exists() and path.is_file() and path.stat().st_size > 0:
                print(code + '：海报已有缓存')
            else:
                print(code + '：海报下载中...')
                download_file(nfo['cover'], code_pics_folder_to_save, 'poster.png')
                print(code + '：海报下载完成')
        return code_list_empty_image
    code_list_empty = download_images_of_nfos(code_infos)
    print("----未找到集数的番号----")
    print([print(code) for code in code_list_empty])
    print("------搜刮未找到集数的番号------")
    code_infos_of_no_ep = download_code_infos(code_list_empty, is_read_cache=False)
    print("----还是未找到番号数据的番号----")
    print([print(code) for code in code_infos_of_no_ep if code_infos_of_no_ep[code] == ''])
    print("----------------------")
    # 开始操作
    # # 2 创建缩略图海报
    # if nfo['imagecut'] == 3:  # 3 是缩略图
    #     download_cover_file(nfo['cover_small'], code, code_pics_folder_to_save)
    # # 3 创建图
    # download_image(nfo['cover'], code, code_pics_folder_to_save)
    # # 4 剪裁
    # crop_image(nfo['imagecut'], code, code_pics_folder_to_save)
    # # 5 背景图
    # copy_images_to_background_image(code, code_pics_folder_to_save)
    # 6 创建 mame.nfo(不需要，需要时从infos中josn文件转为nfo文件)
    # make_nfo_file(nfo, code, temp_path_to_save)
    # 相同番号处理：按集数添加-CD[X]；视频格式 and 大小 分；
    # TODO 方式1 刮削：添加nfo，封面，内容截图等
    # 6 创建 mame.nfo(不需要，需要时从infos中josn文件转为nfo文件)
    make_nfo_file(nfo, code, temp_path_to_save)
    # TODO 方式2 整理：按规则移动影片，字幕 到 演员，发行商，有无🐎 等
    # if config.program_mode == '1':
    #     if multi_part == 1:
    #         number += part  # 这时number会被附加上CD1后缀
    #     smallCoverCheck(path, number, imagecut, json_data['cover_small'], c_word, option, filepath, config.failed_folder)  # 检查小封面
    #     imageDownload(option, json_data['cover'], number, c_word, path, multi_part, filepath, config.failed_folder)  # creatFoder会返回番号路径
    #     cutImage(option, imagecut, path, number, c_word)  # 裁剪图
    #     copyRenameJpgToBackdrop(option, path, number, c_word)
    #     PrintFiles(option, path, c_word, json_data['naming_rule'], part, cn_sub, json_data, filepath, config.failed_folder, tag)  # 打印文件 .nfo
    #     pasteFileToFolder(filepath, path, number, c_word)  # 移动文件
    #     # =======================================================================整理模式
    # elif config.program_mode == '2':
    #     pasteFileToFolder_mode2(filepath, path, multi_part, number, part, c_word)  # 移动文件
    # CEF(config.success_folder)
    # CEF(config.failed_folder)
    print("[+]All finished!!!")
    input("[+][+]Press enter key exit, you can check the error message before you exit.")
--- a/ConfigApp.py
+++ b/ConfigApp.py
@ -0,0 +1,28 @@
 from configparser import ConfigParser
 from MediaServer import MediaServer
 class ConfigApp:
    def __init__(self):
        config_file = 'config.ini'
        config = ConfigParser()
        config.read(config_file, encoding='UTF-8')
        self.success_folder = config['common']['success_output_folder']
        self.failed_folder = config['common']['failed_output_folder']  # 失败输出目录
        self.escape_folder = config['escape']['folders']  # 多级目录刮削需要排除的目录
        self.search_folder = config['common']['search_folder']  # 搜索路径
        self.temp_folder = config['common']['temp_folder']  # 临时资源路径
        self.soft_link = (config['common']['soft_link'] == 1)
        # self.escape_literals = (config['escape']['literals'] == 1)
        self.naming_rule = config['Name_Rule']['naming_rule']
        self.location_rule = config['Name_Rule']['location_rule']
        self.proxy = config['proxy']['proxy']
        self.timeout = float(config['proxy']['timeout'])
        self.retry = int(config['proxy']['retry'])
        self.media_server = MediaServer[config['media']['media_warehouse']]
        self.update_check = config['update']['update_check']
        self.debug_mode = config['debug_mode']['switch']
--- a/0
+++ b/0
--- a/LearningNote/GroupbyDemo.py
+++ b/LearningNote/GroupbyDemo.py
@ -0,0 +1,19 @@
 import pandas as pd
 import numpy as np
 df = pd.DataFrame({'A': ['foo', 'bar', 'foo', 'bar',
                         'foo', 'bar', 'foo', 'foo'],
                   'B': ['one', 'one', 'two', 'three',
                         'two', 'two', 'one', 'three'],
                   'C': np.random.randn(8),
                   'D': np.random.randn(8)})
 print(df)
 groupedA = df.groupby('A').describe()
 groupedAB = df.groupby(['A', 'B'])['C']
 print('---'*18)
 for a, b in groupedAB:
    print('--'*18)
    print(a)
    print('-' * 18)
    print(b)
--- a/LearningNote/PandasDemo.py
+++ b/LearningNote/PandasDemo.py
@ -0,0 +1,38 @@
 import pandas as pd
 import numpy as np
 '''
 python数据处理三剑客之一pandas
 https://pandas.pydata.org/pandas-docs/stable/user_guide 
 https://www.pypandas.cn/docs/getting_started/10min.html
 '''
 dates = pd.date_range('20130101', periods=6)
 df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
 print(dates)
 print(df)
 df2 = pd.DataFrame({'A': 1.,
                    'B': pd.Timestamp('20130102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(["test", "train", "test", "train"]),
                    'F': 'foo'})
 print(df2)
 print(df2.dtypes)
 print(df.head())
 print(df.tail(5))
 print(df.index)
 print(df.columns)
 df.describe() # 统计数据摘要
 df.T # index columns互转
 df.sort_index(axis=1, ascending=False) # 排序，axis=1 是columns，axis=1 是index
 df.sort_values(by='B') # 按值排序 按B列中的值排序
 # 切行
 df.A
 df['A']
 # 切行
 df['20130102':'20130104']
 df[0:3]
--- a/MediaServer.py
+++ b/MediaServer.py
@ -0,0 +1,28 @@
 from enum import Enum, auto
 class MediaServer(Enum):
    EMBY = auto()
    PLEX = auto()
    KODI = auto()
    # media = EMBY
    #
    # def __init__(self, arg):
    #     self = [e for e in MediaServer if arg.upper() == self.name]
    def poster_name(self, name):
        if self == MediaServer.EMBY:  # 保存[name].png
            return name + '.png'
        elif self == MediaServer.KODI:  # 保存[name]-poster.jpg
            return name + '-poster.jpg'
        elif self == MediaServer.PLEX:  # 保存 poster.jpg
            return 'poster.jpg'
    def image_name(self, name):
        if self == MediaServer.EMBY:  # name.jpg
            return name + '.jpg'
        elif self == MediaServer.KODI:  # [name]-fanart.jpg
            return name + '-fanart.jpg'
        elif self == MediaServer.PLEX:  # fanart.jpg
            return 'fanart.jpg'
--- a/Metadate.py
+++ b/Metadate.py
@ -0,0 +1,3 @@
 from addict import Dict
 # class Metadata:
--- a/PathNameProcessor.py
+++ b/PathNameProcessor.py
@ -0,0 +1,115 @@
 import re
 import fuckit
 class PathNameProcessor:
    # 类变量
    pattern_of_file_name_suffixes = r'.(mov|mp4|avi|rmvb|wmv|mov|mkv|flv|ts|m2ts)$'
    # def __init__(self):
    @staticmethod
    def remove_distractions(origin_name):
        """移除干扰项"""
        # 移除文件类型后缀
        origin_name = re.sub(PathNameProcessor.pattern_of_file_name_suffixes, '', origin_name, 0, re.IGNORECASE)
        # 处理包含减号-和_的番号'/-070409_621'
        origin_name = re.sub(r'[-_~*# ]', "-", origin_name, 0)
        origin_name = re.sub(r'(Carib)(bean)?', '-', origin_name, 0, re.IGNORECASE)
        origin_name = re.sub(r'(1pondo)', '-', origin_name, 0, re.IGNORECASE)
        origin_name = re.sub(r'(tokyo)[-. ]?(hot)', '-', origin_name, 0, re.IGNORECASE)
        origin_name = re.sub(r'Uncensored', '-', origin_name, 0, re.IGNORECASE)
        origin_name = re.sub(r'JAV', '-', origin_name, 0, re.IGNORECASE)
        # 移除干扰字段
        origin_name = origin_name.replace('22-sht.me', '-')
        # 去除文件名中时间 1970-2099年 月 日
        pattern_of_date = r'(?:-)(19[789]\d|20\d{2})(-?(0\d|1[012])-?(0[1-9]|[12]\d|3[01])?)?[-.]'
        # 移除字母开头 清晰度相关度 字符
        pattern_of_resolution_alphas = r'(?<![a-zA-Z])(SD|((F|U)|(Full|Ultra)[-_*. ~]?)?HD|BD|(blu[-_*. ~]?ray)|[hx]264|[hx]265|HEVC)'
        # 数字开头的 清晰度相关度 字符
        pattern_of_resolution_numbers = r'(?<!\d)(4K|(1080[ip])|(720p)|(480p))'
        origin_name = re.sub(pattern_of_resolution_alphas, "-", origin_name, 0, re.IGNORECASE)
        origin_name = re.sub(pattern_of_resolution_numbers, "-", origin_name, 0, re.IGNORECASE)
        origin_name = re.sub(pattern_of_date, "-", origin_name)
        if 'FC2' or 'fc2' in origin_name:
            origin_name = origin_name.replace('-PPV', '').replace('PPV-', '').replace('FC2PPV-', 'FC2-').replace(
                'FC2PPV_', 'FC2-')
        # 移除连续重复无意义符号-
        origin_name = re.sub(r"([-.])(\1+)", r"\1", origin_name)
        # 移除尾部无意义符号 方便识别剧集数
        origin_name = re.sub(r'[-.]+$', "", origin_name)
        return origin_name
    @staticmethod
    def extract_suffix_episode(origin_name):
        """ 提取尾部集数号 123ABC(只识别一位) part1 ，ipz.A  ， CD1 ， NOP019B.HD.wmv"""
        episode = None
        with fuckit:
            # 零宽断言获取尾部数字 剧集数 123
            pattern_episodes_number = r'(?<!\d)\d$'
            episode = re.findall(pattern_episodes_number, origin_name)[-1]
            origin_name = re.sub(pattern_episodes_number, "", origin_name)
        with fuckit:
            # 零宽断言获取尾部字幕 剧集数 abc
            pattern_episodes_alpha = r'(?<![a-zA-Z])[a-zA-Z]$'
            episode = re.findall(pattern_episodes_alpha, origin_name)[-1]
            origin_name = re.sub(pattern_episodes_alpha, "", origin_name)
        return episode, origin_name
    @staticmethod
    def extract_code(origin_name):
        """
        提取集数和 规范过的番号
        """
        name = None
        episode = None
        with fuckit:
            # 找到含- 或不含-的 番号：1. 数字+数字 2. 字母+数字
            name = re.findall(r'(?:\d{2,}-\d{2,})|(?:[A-Z]+-?[A-Z]*\d{2,})', origin_name)[-1]
            episode = PathNameProcessor.extract_episode_behind_code(origin_name, name)
            # 将未-的名字处理加上 -
            if not ('-' in name):
                # 无减号-的番号,尝试分段加上-
                # 非贪婪匹配非特殊字符，零宽断言后，数字至少2位连续,ipz221.part2 ， mide072hhb ,n1180
                with fuckit:
                    name = re.findall(r'[a-zA-Z]+\d{2,}', name)[-1]
                    # 比如MCDV-47 mcdv-047 是2个不一样的片子，但是 SIVR-00008 和 SIVR-008是同同一部,但是heyzo除外,heyzo 是四位数
                    if "heyzo" not in name.lower():
                        name = re.sub(r'([a-zA-Z]{2,})(?:0*?)(\d{2,})', r'\1-\2', name)
            # 正则取含-的番号 【字母-[字母]数字】,数字必定大于2位 番号的数组的最后的一个元素
            with fuckit:
                # MKBD_S03-MaRieS
                name = re.findall(r'[a-zA-Z|\d]+-[a-zA-Z|\d]*\d{2,}', name)[-1]
                # 107NTTR-037 -> NTTR-037 , SIVR-00008 -> SIVR-008 ，但是heyzo除外
                if "heyzo" not in name.lower():
                    searched = re.search(r'([a-zA-Z]{2,})-(?:0*)(\d{3,})', name)
                    if searched:
                        name = '-'.join(searched.groups())
        return episode, name
    @staticmethod
    def extract_episode_behind_code(origin_name, code):
        episode = None
        with fuckit:
            # 零宽断言获取尾部字幕 剧集数 abc123
            result_dict = re.search(rf'(?<={code})-?((?P<alpha>([A-Z](?![A-Z])))|(?P<num>\d(?!\d)))', origin_name,
                                    re.I).groupdict()
            episode = result_dict['alpha'] or result_dict['num']
        return episode
 def safe_list_get(list_in, idx, default):
    try:
        return list_in[idx]
    except IndexError:
        return default
--- a/19
+++ b/19
@ -0,0 +1,19 @@
 [[source]]
 name = "pypi"
 url = "https://pypi.org/simple"
 verify_ssl = true
 [dev-packages]
 [packages]
 bs4 = "*"
 tenacity = "*"
 fuckit = "*"
 requests = "*"
 image = "*"
 lazyxml = {editable = true,git = "https://github.com/waynedyck/lazyxml.git",ref = "python-3-conversion_wd1"}
 lxml = "*"
 pyquery = "*"
 [requires]
 python_version = "3.8"
--- a/Pipfile.lock
+++ b/Pipfile.lock
@ -0,0 +1,246 @@
 {
    "_meta": {
        "hash": {
            "sha256": "15bf3c6af3ec315358a0217481a13285f95fc742bb5db8a1f934e0d1c3d7d5e2"
        },
        "pipfile-spec": 6,
        "requires": {
            "python_version": "3.8"
        },
        "sources": [
            {
                "name": "pypi",
                "url": "https://pypi.org/simple",
                "verify_ssl": true
            }
        ]
    },
    "default": {
        "asgiref": {
            "hashes": [
                "sha256:5ee950735509d04eb673bd7f7120f8fa1c9e2df495394992c73234d526907e17",
                "sha256:7162a3cb30ab0609f1a4c95938fd73e8604f63bdba516a7f7d64b83ff09478f0"
            ],
            "markers": "python_version >= '3.5'",
            "version": "==3.3.1"
        },
        "beautifulsoup4": {
            "hashes": [
                "sha256:4c98143716ef1cb40bf7f39a8e3eec8f8b009509e74904ba3a7b315431577e35",
                "sha256:84729e322ad1d5b4d25f805bfa05b902dd96450f43842c4e99067d5e1369eb25",
                "sha256:fff47e031e34ec82bf17e00da8f592fe7de69aeea38be00523c04623c04fb666"
            ],
            "version": "==4.9.3"
        },
        "bs4": {
            "hashes": [
                "sha256:36ecea1fd7cc5c0c6e4a1ff075df26d50da647b75376626cc186e2212886dd3a"
            ],
            "index": "pypi",
            "version": "==0.0.1"
        },
        "certifi": {
            "hashes": [
                "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c",
                "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"
            ],
            "version": "==2020.12.5"
        },
        "chardet": {
            "hashes": [
                "sha256:0d6f53a15db4120f2b08c94f11e7d93d2c911ee118b6b30a04ec3ee8310179fa",
                "sha256:f864054d66fd9118f2e67044ac8981a54775ec5b67aed0441892edb553d21da5"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4'",
            "version": "==4.0.0"
        },
        "cssselect": {
            "hashes": [
                "sha256:f612ee47b749c877ebae5bb77035d8f4202c6ad0f0fc1271b3c18ad6c4468ecf",
                "sha256:f95f8dedd925fd8f54edb3d2dfb44c190d9d18512377d3c1e2388d16126879bc"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
            "version": "==1.1.0"
        },
        "django": {
            "hashes": [
                "sha256:2d78425ba74c7a1a74b196058b261b9733a8570782f4e2828974777ccca7edf7",
                "sha256:efa2ab96b33b20c2182db93147a0c3cd7769d418926f9e9f140a60dca7c64ca9"
            ],
            "markers": "python_version >= '3.6'",
            "version": "==3.1.5"
        },
        "fuckit": {
            "hashes": [
                "sha256:059488e6aa2053da9db5eb5101e2498f608314da5118bf2385acb864568ccc25"
            ],
            "index": "pypi",
            "version": "==4.8.1"
        },
        "idna": {
            "hashes": [
                "sha256:b307872f855b18632ce0c21c5e45be78c0ea7ae4c15c828c20788b26921eb3f6",
                "sha256:b97d804b1e9b523befed77c48dacec60e6dcb0b5391d57af6a65a312a90648c0"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
            "version": "==2.10"
        },
        "image": {
            "hashes": [
                "sha256:baa2e09178277daa50f22fd6d1d51ec78f19c12688921cb9ab5808743f097126"
            ],
            "index": "pypi",
            "version": "==1.5.33"
        },
        "lazyxml": {
            "editable": true,
            "git": "https://github.com/waynedyck/lazyxml.git",
            "ref": "f42ea4a4febf4c1e120b05d6ca9cef42556a75d5"
        },
        "lxml": {
            "hashes": [
                "sha256:0448576c148c129594d890265b1a83b9cd76fd1f0a6a04620753d9a6bcfd0a4d",
                "sha256:127f76864468d6630e1b453d3ffbbd04b024c674f55cf0a30dc2595137892d37",
                "sha256:1471cee35eba321827d7d53d104e7b8c593ea3ad376aa2df89533ce8e1b24a01",
                "sha256:2363c35637d2d9d6f26f60a208819e7eafc4305ce39dc1d5005eccc4593331c2",
                "sha256:2e5cc908fe43fe1aa299e58046ad66981131a66aea3129aac7770c37f590a644",
                "sha256:2e6fd1b8acd005bd71e6c94f30c055594bbd0aa02ef51a22bbfa961ab63b2d75",
                "sha256:366cb750140f221523fa062d641393092813b81e15d0e25d9f7c6025f910ee80",
                "sha256:42ebca24ba2a21065fb546f3e6bd0c58c3fe9ac298f3a320147029a4850f51a2",
                "sha256:4e751e77006da34643ab782e4a5cc21ea7b755551db202bc4d3a423b307db780",
                "sha256:4fb85c447e288df535b17ebdebf0ec1cf3a3f1a8eba7e79169f4f37af43c6b98",
                "sha256:50c348995b47b5a4e330362cf39fc503b4a43b14a91c34c83b955e1805c8e308",
                "sha256:535332fe9d00c3cd455bd3dd7d4bacab86e2d564bdf7606079160fa6251caacf",
                "sha256:535f067002b0fd1a4e5296a8f1bf88193080ff992a195e66964ef2a6cfec5388",
                "sha256:5be4a2e212bb6aa045e37f7d48e3e1e4b6fd259882ed5a00786f82e8c37ce77d",
                "sha256:60a20bfc3bd234d54d49c388950195d23a5583d4108e1a1d47c9eef8d8c042b3",
                "sha256:648914abafe67f11be7d93c1a546068f8eff3c5fa938e1f94509e4a5d682b2d8",
                "sha256:681d75e1a38a69f1e64ab82fe4b1ed3fd758717bed735fb9aeaa124143f051af",
                "sha256:68a5d77e440df94011214b7db907ec8f19e439507a70c958f750c18d88f995d2",
                "sha256:69a63f83e88138ab7642d8f61418cf3180a4d8cd13995df87725cb8b893e950e",
                "sha256:6e4183800f16f3679076dfa8abf2db3083919d7e30764a069fb66b2b9eff9939",
                "sha256:6fd8d5903c2e53f49e99359b063df27fdf7acb89a52b6a12494208bf61345a03",
                "sha256:791394449e98243839fa822a637177dd42a95f4883ad3dec2a0ce6ac99fb0a9d",
                "sha256:7a7669ff50f41225ca5d6ee0a1ec8413f3a0d8aa2b109f86d540887b7ec0d72a",
                "sha256:7e9eac1e526386df7c70ef253b792a0a12dd86d833b1d329e038c7a235dfceb5",
                "sha256:7ee8af0b9f7de635c61cdd5b8534b76c52cd03536f29f51151b377f76e214a1a",
                "sha256:8246f30ca34dc712ab07e51dc34fea883c00b7ccb0e614651e49da2c49a30711",
                "sha256:8c88b599e226994ad4db29d93bc149aa1aff3dc3a4355dd5757569ba78632bdf",
                "sha256:923963e989ffbceaa210ac37afc9b906acebe945d2723e9679b643513837b089",
                "sha256:94d55bd03d8671686e3f012577d9caa5421a07286dd351dfef64791cf7c6c505",
                "sha256:97db258793d193c7b62d4e2586c6ed98d51086e93f9a3af2b2034af01450a74b",
                "sha256:a9d6bc8642e2c67db33f1247a77c53476f3a166e09067c0474facb045756087f",
                "sha256:cd11c7e8d21af997ee8079037fff88f16fda188a9776eb4b81c7e4c9c0a7d7fc",
                "sha256:d8d3d4713f0c28bdc6c806a278d998546e8efc3498949e3ace6e117462ac0a5e",
                "sha256:e0bfe9bb028974a481410432dbe1b182e8191d5d40382e5b8ff39cdd2e5c5931",
                "sha256:f4822c0660c3754f1a41a655e37cb4dbbc9be3d35b125a37fab6f82d47674ebc",
                "sha256:f83d281bb2a6217cd806f4cf0ddded436790e66f393e124dfe9731f6b3fb9afe",
                "sha256:fc37870d6716b137e80d19241d0e2cff7a7643b925dfa49b4c8ebd1295eb506e"
            ],
            "index": "pypi",
            "version": "==4.6.2"
        },
        "pillow": {
            "hashes": [
                "sha256:165c88bc9d8dba670110c689e3cc5c71dbe4bfb984ffa7cbebf1fac9554071d6",
                "sha256:1d208e670abfeb41b6143537a681299ef86e92d2a3dac299d3cd6830d5c7bded",
                "sha256:22d070ca2e60c99929ef274cfced04294d2368193e935c5d6febfd8b601bf865",
                "sha256:2353834b2c49b95e1313fb34edf18fca4d57446675d05298bb694bca4b194174",
                "sha256:39725acf2d2e9c17356e6835dccebe7a697db55f25a09207e38b835d5e1bc032",
                "sha256:3de6b2ee4f78c6b3d89d184ade5d8fa68af0848f9b6b6da2b9ab7943ec46971a",
                "sha256:47c0d93ee9c8b181f353dbead6530b26980fe4f5485aa18be8f1fd3c3cbc685e",
                "sha256:5e2fe3bb2363b862671eba632537cd3a823847db4d98be95690b7e382f3d6378",
                "sha256:604815c55fd92e735f9738f65dabf4edc3e79f88541c221d292faec1904a4b17",
                "sha256:6c5275bd82711cd3dcd0af8ce0bb99113ae8911fc2952805f1d012de7d600a4c",
                "sha256:731ca5aabe9085160cf68b2dbef95fc1991015bc0a3a6ea46a371ab88f3d0913",
                "sha256:7612520e5e1a371d77e1d1ca3a3ee6227eef00d0a9cddb4ef7ecb0b7396eddf7",
                "sha256:7916cbc94f1c6b1301ac04510d0881b9e9feb20ae34094d3615a8a7c3db0dcc0",
                "sha256:81c3fa9a75d9f1afafdb916d5995633f319db09bd773cb56b8e39f1e98d90820",
                "sha256:887668e792b7edbfb1d3c9d8b5d8c859269a0f0eba4dda562adb95500f60dbba",
                "sha256:93a473b53cc6e0b3ce6bf51b1b95b7b1e7e6084be3a07e40f79b42e83503fbf2",
                "sha256:96d4dc103d1a0fa6d47c6c55a47de5f5dafd5ef0114fa10c85a1fd8e0216284b",
                "sha256:a3d3e086474ef12ef13d42e5f9b7bbf09d39cf6bd4940f982263d6954b13f6a9",
                "sha256:b02a0b9f332086657852b1f7cb380f6a42403a6d9c42a4c34a561aa4530d5234",
                "sha256:b09e10ec453de97f9a23a5aa5e30b334195e8d2ddd1ce76cc32e52ba63c8b31d",
                "sha256:b6f00ad5ebe846cc91763b1d0c6d30a8042e02b2316e27b05de04fa6ec831ec5",
                "sha256:bba80df38cfc17f490ec651c73bb37cd896bc2400cfba27d078c2135223c1206",
                "sha256:c3d911614b008e8a576b8e5303e3db29224b455d3d66d1b2848ba6ca83f9ece9",
                "sha256:ca20739e303254287138234485579b28cb0d524401f83d5129b5ff9d606cb0a8",
                "sha256:cb192176b477d49b0a327b2a5a4979552b7a58cd42037034316b8018ac3ebb59",
                "sha256:cdbbe7dff4a677fb555a54f9bc0450f2a21a93c5ba2b44e09e54fcb72d2bd13d",
                "sha256:cf6e33d92b1526190a1de904df21663c46a456758c0424e4f947ae9aa6088bf7",
                "sha256:d355502dce85ade85a2511b40b4c61a128902f246504f7de29bbeec1ae27933a",
                "sha256:d673c4990acd016229a5c1c4ee8a9e6d8f481b27ade5fc3d95938697fa443ce0",
                "sha256:dc577f4cfdda354db3ae37a572428a90ffdbe4e51eda7849bf442fb803f09c9b",
                "sha256:dd9eef866c70d2cbbea1ae58134eaffda0d4bfea403025f4db6859724b18ab3d",
                "sha256:f50e7a98b0453f39000619d845be8b06e611e56ee6e8186f7f60c3b1e2f0feae"
            ],
            "markers": "python_version >= '3.6'",
            "version": "==8.1.0"
        },
        "pyquery": {
            "hashes": [
                "sha256:1fc33b7699455ed25c75282bc8f80ace1ac078b0dda5a933dacbd8b1c1f83963",
                "sha256:a388eefb6bc4a55350de0316fbd97cda999ae669b6743ae5b99102ba54f5aa72"
            ],
            "index": "pypi",
            "version": "==1.4.3"
        },
        "pytz": {
            "hashes": [
                "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4",
                "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5"
            ],
            "version": "==2020.5"
        },
        "requests": {
            "hashes": [
                "sha256:27973dd4a904a4f13b263a19c866c13b92a39ed1c964655f025f3f8d3d75b804",
                "sha256:c210084e36a42ae6b9219e00e48287def368a26d03a048ddad7bfee44f75871e"
            ],
            "index": "pypi",
            "version": "==2.25.1"
        },
        "six": {
            "hashes": [
                "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
                "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'",
            "version": "==1.15.0"
        },
        "soupsieve": {
            "hashes": [
                "sha256:4bb21a6ee4707bf43b61230e80740e71bfe56e55d1f1f50924b087bb2975c851",
                "sha256:6dc52924dc0bc710a5d16794e6b3480b2c7c08b07729505feab2b2c16661ff6e"
            ],
            "markers": "python_version >= '3.0'",
            "version": "==2.1"
        },
        "sqlparse": {
            "hashes": [
                "sha256:017cde379adbd6a1f15a61873f43e8274179378e95ef3fede90b5aa64d304ed0",
                "sha256:0f91fd2e829c44362cbcfab3e9ae12e22badaa8a29ad5ff599f9ec109f0454e8"
            ],
            "markers": "python_version >= '3.5'",
            "version": "==0.4.1"
        },
        "tenacity": {
            "hashes": [
                "sha256:baed357d9f35ec64264d8a4bbf004c35058fad8795c5b0d8a7dc77ecdcbb8f39",
                "sha256:e14d191fb0a309b563904bbc336582efe2037de437e543b38da749769b544d7f"
            ],
            "index": "pypi",
            "version": "==6.3.1"
        },
        "urllib3": {
            "hashes": [
                "sha256:19188f96923873c92ccb987120ec4acaa12f0461fa9ce5d3d0772bc965a39e08",
                "sha256:d8ff90d979214d7b4f8ce956e80f4028fc6860e4431f731ea4a8c08f23f99473"
            ],
            "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4' and python_version < '4'",
            "version": "==1.26.2"
        }
    },
    "develop": {}
 }
--- a/README.md
+++ b/README.md
@ -1,15 +1,24 @@
-# AV Data Capture
+# AV Data Capture (CLI)
 CLI 版本  
 <a title="Hits" target="_blank" href="https://github.com/yoshiko2/AV_Data_Capture"><img src="https://hits.b3log.org/yoshiko2/AV_Data_Capture.svg"></a>
 ![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square)
-![](https://img.shields.io/github/downloads/yoshiko2/av_data_capture/total.svg?style=flat-square)<br>
+![](https://img.shields.io/github/downloads/yoshiko2/av_data_capture/total.svg?style=flat-square)
 ![](https://img.shields.io/github/license/yoshiko2/av_data_capture.svg?style=flat-square)
 ![](https://img.shields.io/github/release/yoshiko2/av_data_capture.svg?style=flat-square)
 ![](https://img.shields.io/badge/Python-3.7-yellow.svg?style=flat-square&logo=python)<br>
 [GUI 版本](https://github.com/moyy996/AVDC)  
 <a title="Hits" target="_blank" href="https://github.com/moyy996/avdc"><img src="https://hits.b3log.org/moyy996/AVDC.svg"></a>
 ![](https://img.shields.io/badge/build-passing-brightgreen.svg?style=flat-square)
 ![](https://img.shields.io/github/downloads/moyy996/avdc/total.svg?style=flat-square)
 ![](https://img.shields.io/github/license/moyy996/avdc.svg?style=flat-square)
 ![](https://img.shields.io/github/release/moyy996/avdc.svg?style=flat-square)
 ![](https://img.shields.io/badge/Python-3.6-yellow.svg?style=flat-square&logo=python)
 ![](https://img.shields.io/badge/Pyqt-5-blue.svg?style=flat-square)<br>
-**日本电影元数据 抓取工具 | 刮削器**，配合本地影片管理软件EMBY,KODI等管理本地影片，该软件起到分类与元数据抓取作用，利用元数据信息来分类，供本地影片分类整理使用。
+**日本电影元数据 抓取工具 | 刮削器**，配合本地影片管理软件 Emby, Jellyfin, Kodi 等管理本地影片，该软件起到分类与元数据（metadata）抓取作用，利用元数据信息来分类，供本地影片分类整理使用。  
 ##### 本地电影刮削与整理一体化解决方案
 # 目录
 * [声明](#声明)
@ -17,112 +26,115 @@
 * [故事](#故事)
 * [效果图](#效果图)
 * [如何使用](#如何使用)
-* [下载](#下载)
+    * [下载](#下载)
-* [简明教程](#简要教程)
+    * [简要教程](#简要教程)
-* [模块安装](#1模块安装)
+* [完整文档](#完整文档)
-* [配置](#2配置configini)
+    * [模块安装](#模块安装)
-* [多目录影片处理](#4多目录影片处理)
+    * [配置](#配置configini)
-* [多集影片处理](#多集影片处理)
+    * [多目录影片处理](#多目录影片处理)
-* [(可选)设置自定义目录和影片重命名规则](#3可选设置自定义目录和影片重命名规则)
+    * [多集影片处理](#多集影片处理)
-* [运行软件](#5运行-av_data_capturepyexe)
+    * [中文字幕处理](#中文字幕处理)
-* [影片原路径处理](#4建议把软件拷贝和电影的统一目录下)
+    * [异常处理（重要）](#异常处理重要)
-* [异常处理（重要）](#51异常处理重要)
+* [写在后面](#写在后面)
 * [导入至媒体库](#7把jav_output文件夹导入到embykodi中等待元数据刷新完成)
 * [关于群晖NAS](#8关于群晖NAS)
 * [写在后面](#9写在后面)
 # 声明
-* 本软件仅供**技术交流，学术交流**使用<br>
+* 本软件仅供**技术交流，学术交流**使用
-* 本软件作者编写出该软件旨在学习Python3，提高编程水平<br>
+* 本软件作者编写出该软件旨在学习 Python ，提高编程水平
-* 用户在使用该软件前，请用户自觉遵守当地法律法规，如果该软件使用过程中存在违反当地法律法规的行为，请勿使用该软件<br>
+* 用户在使用本软件前，请用户自觉遵守当地法律法规，如果本软件使用过程中存在违反当地法律法规的行为，请勿使用该软件
-* 用户使用该软件时，若产生一切违法行为由用户承担<br>
+* 用户在使用本软件时，若产生一切违法行为由用户承担
-* 严禁用户使用于商业和个人其他意图<br>
+* 严禁用户将本软件使用于商业和个人其他意图
-* 本软件作者保留最终决定权和最终解释权<br>
+* 本软件作者保留最终决定权和最终解释权
-**若用户不同意上述条款任意一条，请勿使用该软件**<br>
+**若用户不同意上述条款任意一条，请勿使用本软件**
 # FAQ
-### 这软件能下片吗？
+### 软件能下片吗？
-* 该软件不提供任何影片下载地址，仅供本地影片分类整理使用。
+* 本软件不提供任何影片下载地址，仅供本地影片分类整理使用
-### 什么是元数据？
+### 什么是元数据（metadata）？
-* 元数据包括了影片的：封面，导演，演员，简介，类型......
+* 元数据包括了影片的封面，导演，演员，简介，类型......
 ### 软件收费吗？
-* 软件永久免费。**除了作者钦点以外**
+* 本软件永久免费，**除了作者<ruby>钦<rt>yìng</rt></ruby>点以外**
 ### 软件运行异常怎么办？
-* 认真看 [异常处理（重要）](#5异常处理重要)
+* 认真看 [异常处理（重要）](#异常处理重要)
-
+### 为什么软件要单线程运行？
-# 故事
+* 多线程爬取可能会触发网站反爬机制，同时也违背了些道德，故单线程运行
 [点击跳转至作者博客文章](https://yoshiko2.github.io/2019/10/18/AVDC/)
 # 效果图
-**图片来自网络**，由于相关法律法规，具体效果请自行联想
+**图片来自网络**，图片仅供参考，具体效果请自行联想
-![](https://i.loli.net/2019/07/04/5d1cf9bb1b08b86592.jpg)
+![preview_picture_1](https://i.loli.net/2019/07/04/5d1cf9bb1b08b86592.jpg)
-![](https://i.loli.net/2019/07/04/5d1cf9bb2696937880.jpg)<br>
+![preview_picture_2](https://i.loli.net/2019/07/04/5d1cf9bb2696937880.jpg)
 # 如何使用
-### 下载
+## 下载
-* release的程序可脱离**python环境**运行，可跳过 [模块安装](#1请安装模块在cmd终端逐条输入以下命令安装)<br>Release 下载地址(**仅限Windows**):<br>[![](https://img.shields.io/badge/%E4%B8%8B%E8%BD%BD-windows-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yoshiko2/AV_Data_Capture/releases)<br>
+* release的程序可脱离**python环境**运行，可跳过 [模块安装](#模块安装)
-* Linux,MacOS请下载源码包运行
+### Windows
 Release 下载地址(**仅限Windows**):
-* Windows Python环境:[点击前往](https://www.python.org/downloads/windows/) 选中executable installer下载
+[![](https://img.shields.io/badge/%E4%B8%8B%E8%BD%BD-windows-blue.svg?style=for-the-badge&logo=windows)](https://github.com/yoshiko2/AV_Data_Capture/releases)
 * MacOS Python环境：[点击前往](https://www.python.org/downloads/mac-osx/)
 * Linux Python环境：Linux用户懂的吧，不解释下载地址
 ### 简要教程:<br>
 **1.把软件拉到和电影的同一目录<br>2.设置ini文件的代理（路由器拥有自动代理功能的可以把proxy=后面内容去掉）<br>3.运行软件等待完成<br>4.把JAV_output导入至KODI,EMBY中。<br>详细请看以下教程**<br>
-## 1.模块安装
+* 若 Windows 用户需要运行源代码版本，请安装 Windows Python 环境:[点击前往](https://www.python.org/downloads/windows/) 选中 executable installer 下载
 ### MacOS, Linux
 * MacOS, Linux 用户请下载源码包运行
 * MacOS Python环境：开箱即用，[可选安装最新版本](https://docs.brew.sh/Homebrew-and-Python)
 * Linux Python环境：开箱即用，可选安装最新版本，恕 Linux 版本众多请自行搜索
 ## 简要教程:
 1. 把软件拉到和电影的同一目录
 2. 设置 config.ini 文件的代理（路由器拥有自动代理功能的可以把 proxy= 后面内容去掉）
 3. 运行软件等待完成
 4. 把 JAV_output 导入至 Kodi, Emby, Jellyfin 中。
 详细请看以下完整文档
 # 完整文档
 ## 模块安装
 如果运行**源码**版，运行前请安装**Python环境**和安装以下**模块**  
 在终端/cmd/Powershell中输入以下代码来安装模块
 ```python
 pip install requests
 ```
 ### 
 ```python
 pip install pyquery
 ```
 ###
 ```python
 pip install lxml
 ```
 ###
 ```python
 pip install Beautifulsoup4
 ```
 ###
 ```python
 pip install pillow
 ```
 ###
-## 2.配置config.ini
+在终端 cmd/Powershell/Terminal 中输入以下代码来安装模块
 #### 运行模式
 >[common]<br>
 >main_mode=1<br>
-1为普通模式<br>
+```python
 pip install requests pyquery lxml Beautifulsoup4 pillow
 ```
 ## 配置config.ini
 ### 运行模式
 ```
 [common]
 main_mode=1
 ```
 1为普通模式，  
 2为整理模式：仅根据女优把电影命名为番号并分类到女优名称的文件夹下
->failed_output_folder=failed<br>
+```
->success_output_folder=JAV_outputd<br>
+success_output_folder=JAV_outputd
-
+failed_output_folder=failed
 ```
 设置成功输出目录和失败输出目录
 ---
 #### 软链接
->[common]  
+方便PT下载完既想刮削又想继续上传的仓鼠党同志
->soft_link=0  
+```
-
+[common]
 soft_link=0
 ```
 1为开启软链接模式  
 0为关闭
 ---
 ### 网络设置
->[proxy]  
+```
->proxy=127.0.0.1:1081  
+[proxy]  
->timeout=10  
+proxy=127.0.0.1:1081  
->retry=3  
+timeout=10  
 retry=3
 ```  
 #### 针对某些地区的代理设置
->proxy=127.0.0.1:1081  
+```
 proxy=127.0.0.1:1081  
 ```
 打开```config.ini```,在```[proxy]```下的```proxy```行设置本地代理地址和端口，支持Shadowxxxx/X,V2XXX本地代理端口  
 素人系列抓取建议使用日本代理  
@ -132,98 +144,120 @@ pip install pillow
 ---
 #### 连接超时重试设置
->timeout=10  
+```
-
+timeout=10  
 ```
 10为超时重试时间 单位：秒
 ---
 #### 连接重试次数设置
->retry=3  
+```
-
+retry=3  
 ```
 3即为重试次数
 ---
 #### 检查更新开关
->[update]  
+```
->update_check=1  
+[update]  
-
+update_check=1  
 ```
 0为关闭，1为开启，不建议关闭
 ---
 ### 媒体库选择 
->[media]<br>
+```
->media_warehouse=emby<br>
+[media]
->#emby plex kodi<br>
+media_warehouse=emby
-
+#emby plex kodi
-可选择emby, plex, kodi<br>
+```
 可选择emby, plex, kodi
 如果是PLEX，请安装插件：```XBMCnfoMoviesImporter```
 ---
-### 抓取目录
+### 排除指定字符和目录
->[escape]  
+```
->literals=\  
+[escape]  
 literals=\  
 folders=failed,JAV_output
 ```
-```literals=``` 标题指定字符删除，例如```iterals=\()```，删除标题中```\()```字符
+```literals=``` 标题指定字符删除，例如```iterals=\()```，则删除标题中```\()```字符  
-
+```folders=``` 指定目录，例如```folders=failed,JAV_output```，多目录刮削时跳过failed,JAV_output  
 ---
 ### 抓取目录选择
 >[movie_location]<br>
 >path=<br>
 如果directory后面为空，则抓取和程序同一目录下的影片
 ---
 ### 调试模式
->[debug_mode]<br>switch=1  
+```
 [debug_mode]
 switch=1  
 ```
 如要开启调试模式，请手动输入以上代码到```config.ini```中，开启后可在抓取中显示影片元数据
 ---
-### 3.(可选)设置自定义目录和影片重命名规则
+### (可选)设置自定义目录和影片重命名规则
->[Name_Rule]<br>
+```
->location_rule=actor+'/'+number<br>
+[Name_Rule]
->naming_rule=number+'-'+title<br>
+location_rule=actor+'/'+number
-
+naming_rule=number+'-'+title
 ```
 已有默认配置
 ---
 #### 命名参数
->title = 片名<br>
+```
->actor = 演员<br>
+title = 片名
->studio = 公司<br>
+actor = 演员
->director = 导演<br>
+studio = 公司
->release = 发售日<br>
+director = 导演
->year = 发行年份<br>
+release = 发售日
->number = 番号<br>
+year = 发行年份
->cover = 封面链接<br>
+number = 番号
->tag = 类型<br>
+cover = 封面链接
->outline = 简介<br>
+tag = 类型
->runtime = 时长<br>
+outline = 简介
 runtime = 时长
 ```
 上面的参数以下都称之为**变量**
 #### 例子：
-自定义规则方法：有两种元素，变量和字符，无论是任何一种元素之间连接必须要用加号 **+** ，比如：```'naming_rule=['+number+']-'+title```，其中冒号 ' ' 内的文字是字符，没有冒号包含的文字是变量，元素之间连接必须要用加号 **+** <br>
+自定义规则方法：有两种元素，变量和字符，无论是任何一种元素之间连接必须要用加号 **+** ，比如：```'naming_rule=['+number+']-'+title```，其中冒号 ' ' 内的文字是字符，没有冒号包含的文字是变量，元素之间连接必须要用加号 **+** 
-目录结构规则：默认 ```location_rule=actor+'/'+number```<br> **不推荐修改时在这里添加title**，有时title过长，因为Windows API问题，抓取数据时新建文件夹容易出错。<br>
+
-影片命名规则：默认 ```naming_rule=number+'-'+title```<br> **在EMBY,KODI等本地媒体库显示的标题，不影响目录结构下影片文件的命名**，依旧是 番号+后缀。
+目录结构规则：默认 ```location_rule=actor+'/'+number```
 **不推荐修改时在这里添加 title**，有时 title 过长，因为 Windows API 问题，抓取数据时新建文件夹容易出错。
 影片命名规则：默认 ```naming_rule=number+'-'+title```
 **在 Emby, Kodi等本地媒体库显示的标题，不影响目录结构下影片文件的命名**，依旧是 番号+后缀。
 ---
 ### 更新开关
->[update]<br>update_check=1<br>
+```
 [update]
 update_check=1
 ```
 1为开，0为关
-## 4.多目录影片处理
+## 多目录影片处理
 可以在多个有影片目录的父目录下搜索影片后缀，然后剪切到和程序同一目录下  
 ## 多集影片处理
 **建议使用视频合并合并为一个视频文件**
 可以把多集电影按照集数后缀命名为类似```ssni-xxx-cd1.mp4m,ssni-xxx-cd2.mp4，abp-xxx-CD1.mp4```的规则，只要含有```-CDn./-cdn.```类似命名规则，即可使用分集功能
-## 5.运行 ```AV_Data_capture.py/.exe```
+## 中文字幕处理
-当文件名包含:<br>
+
 运行 ```AV_Data_capture.py/.exe```
 当文件名包含:
 中文，字幕，-c., -C., 处理元数据时会加上**中文字幕**标签
-## 5.1 异常处理（重要）
+
 ## 异常处理（重要）
 ### 请确保软件是完整地！确保ini文件内容是和下载提供ini文件内容的一致的！
 ---
 ### 关于软件打开就闪退
@ -243,41 +277,50 @@ pip install pillow
 ---
 ### 关于连接拒绝的错误
-请设置好[代理](#针对某些地区的代理设置)<br>
+请设置好[代理](#针对某些地区的代理设置)
 ---
 ### 关于Nonetype,xpath报错
-同上<br>
+同上
 ---
 ### 关于番号提取失败或者异常
-**目前可以提取元素的影片:JAVBUS上有元数据的电影，素人系列:300Maan,259luxu,siro等,FC2系列**<br>
+**目前可以提取元素的影片:JAVBUS上有元数据的电影，素人系列:300Maan,259luxu,siro等,FC2系列**
->下一张图片来自Pockies的blog 原作者已授权<br>
+
 >下一张图片来自 Pockies 的 blog 原作者已授权
 ![](https://raw.githubusercontent.com/Pockies/pic/master/741f9461gy1g1cxc31t41j20i804zdgo.jpg)
-目前作者已经完善了番号提取机制，功能较为强大，可提取上述文件名的的番号，如果出现提取失败或者异常的情况，请用以下规则命名<br>
+目前作者已经完善了番号提取机制，功能较为强大，可提取上述文件名的的番号，如果出现提取失败或者异常的情况，请用以下规则命名
-**妈蛋不要喂软件那么多野鸡片子，不让软件好好活了，操**
+
 ```
 COSQ-004.mp4
 ```
 针对 **野鸡番号** ，你需要把文件名命名为与抓取网站提供的番号一致（文件拓展名除外），然后把文件拖拽至core.exe/.py<br>
 **野鸡番号**:比如 ```XXX-XXX-1```,  ```1301XX-MINA_YUKA``` 这种**野鸡**番号，在javbus等资料库存在的作品。<br>**重要**：除了 **影片文件名**  ```XXXX-XXX-C```，后面这种-C的是指电影有中文字幕！<br>
 条件：文件名中间要有下划线或者减号"_","-"，没有多余的内容只有番号为最佳，可以让软件更好获取元数据
-对于多影片重命名，可以用[ReNamer](http://www.den4b.com/products/renamer)来批量重命名<br>
+对于多影片重命名，可以用 [ReNamer](http://www.den4b.com/products/renamer) 来批量重命名
 ---
 ### 关于PIL/image.py
-暂时无解，可能是网络问题或者pillow模块打包问题，你可以用源码运行（要安装好第一步的模块）
+暂时无解，可能是网络问题或者pillow模块打包问题，你可以用源码运行（要安装好第一步的模块）  
 ### 拖动法
 针对格式比较奇葩的番号  
 影片放在和程序同一目录下，拖动至```AV_Data_Capture.exe```，即可完成刮削和整理
-## 6.软件会自动把元数据获取成功的电影移动到JAV_output文件夹中，根据演员分类，失败的电影移动到failed文件夹中。
+### 软件会自动把元数据获取成功的电影移动到 JAV_output 文件夹中，根据演员分类，失败的电影移动到failed文件夹中。
-## 7.把JAV_output文件夹导入到EMBY,KODI中，等待元数据刷新，完成
+
-## 8.关于群晖NAS
+### 把JAV_output文件夹导入到 Emby, Kodi中，等待元数据刷新，完成
-开启SMB在Windows上挂载为网络磁盘即可使用本软件，也适用于其他NAS
+
-## 9.写在后面
+### 关于群晖NAS
-怎么样，看着自己的日本电影被这样完美地管理，是不是感觉成就感爆棚呢?<br>
+开启 SMB，并在 Windows 上挂载为网络磁盘即可使用本软件，也适用于其他 NAS
-**tg官方电报群:[ 点击进群](https://t.me/joinchat/J54y1g3-a7nxJ_-WS4-KFQ)**<br>
+
 ## 写在后面
 怎么样，看着自己的日本电影被这样完美地管理，是不是感觉成就感爆棚呢?
 **tg官方电报群:[ 点击进群](https://t.me/joinchat/J54y1g3-a7nxJ_-WS4-KFQ)**
--- a/SiteSource/avsox.py
+++ b/SiteSource/avsox.py
@ -1,112 +1,116 @@
-import re
+import re
-from lxml import etree
+from lxml import etree
-import json
+import json
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-from ADC_function import *
+from ADC_function import *
-
+# import sys
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+# import io
-    soup = BeautifulSoup(htmlcode, 'lxml')
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-    a = soup.find_all(attrs={'class': 'avatar-box'})
+
-    d = {}
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
-    for i in a:
+    soup = BeautifulSoup(htmlcode, 'lxml')
-        l = i.img['src']
+    a = soup.find_all(attrs={'class': 'avatar-box'})
-        t = i.span.get_text()
+    d = {}
-        p2 = {t: l}
+    for i in a:
-        d.update(p2)
+        l = i.img['src']
-    return d
+        t = i.span.get_text()
-def getTitle(a):
+        p2 = {t: l}
-    try:
+        d.update(p2)
-        html = etree.fromstring(a, etree.HTMLParser())
+    return d
-        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
+def getTitle(a):
-        return result.replace('/', '')
+    try:
-    except:
+        html = etree.fromstring(a, etree.HTMLParser())
-        return ''
+        result = str(html.xpath('/html/body/div[2]/h3/text()')).strip(" ['']") #[0]
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+        return result.replace('/', '')
-    soup = BeautifulSoup(a, 'lxml')
+    except:
-    a = soup.find_all(attrs={'class': 'avatar-box'})
+        return ''
-    d = []
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    for i in a:
+    soup = BeautifulSoup(a, 'lxml')
-        d.append(i.span.get_text())
+    a = soup.find_all(attrs={'class': 'avatar-box'})
-    return d
+    d = []
-def getStudio(a):
+    for i in a:
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        d.append(i.span.get_text())
-    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
+    return d
-    return result1
+def getStudio(a):
-def getRuntime(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//p[contains(text(),"制作商: ")]/following-sibling::p[1]/a/text()')).strip(" ['']").replace("', '",' ')
-    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
+    return result1
-    return result1
+def getRuntime(a):
-def getLabel(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//span[contains(text(),"长度:")]/../text()')).strip(" ['分钟']")
-    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
+    return result1
-    return result1
+def getLabel(a):
-def getNum(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//p[contains(text(),"系列:")]/following-sibling::p[1]/a/text()')).strip(" ['']")
-    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
+    return result1
-    return result1
+def getNum(a):
-def getYear(release):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    try:
+    result1 = str(html.xpath('//span[contains(text(),"识别码:")]/../span[2]/text()')).strip(" ['']")
-        result = str(re.search('\d{4}',release).group())
+    return result1
-        return result
+def getYear(release):
-    except:
+    try:
-        return release
+        result = str(re.search('\d{4}',release).group())
-def getRelease(a):
+        return result
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    except:
-    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
+        return release
-    return result1
+def getRelease(a):
-def getCover(htmlcode):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath('//span[contains(text(),"发行时间:")]/../text()')).strip(" ['']")
-    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
+    return result1
-    return result
+def getCover(htmlcode):
-def getCover_small(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = str(html.xpath('/html/body/div[2]/div[1]/div[1]/a/img/@src')).strip(" ['']")
-    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
+    return result
-    return result
+def getCover_small(htmlcode):
-def getTag(a):  # 获取演员
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    soup = BeautifulSoup(a, 'lxml')
+    result = str(html.xpath('//*[@id="waterfall"]/div/a/div[1]/img/@src')).strip(" ['']")
-    a = soup.find_all(attrs={'class': 'genre'})
+    return result
-    d = []
+def getTag(a):  # 获取演员
-    for i in a:
+    soup = BeautifulSoup(a, 'lxml')
-        d.append(i.get_text())
+    a = soup.find_all(attrs={'class': 'genre'})
-    return d
+    d = []
-
+    for i in a:
-def main(number):
+        d.append(i.get_text())
-    a = get_html('https://avsox.host/cn/search/' + number)
+    return d
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+
-    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+def main(number):
-    if result1 == '' or result1 == 'null' or result1 == 'None':
+    url = 'https://avsox.host/cn/search/' + number
-        a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
+    a = get_html(url)
-        print(a)
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+    if result1 == '' or result1 == 'null' or result1 == 'None':
-        if result1 == '' or result1 == 'null' or result1 == 'None':
+        a = get_html('https://avsox.host/cn/search/' + number.replace('-', '_'))
-            a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
+        print(a)
-            print(a)
+        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
+        if result1 == '' or result1 == 'null' or result1 == 'None':
-    web = get_html(result1)
+            a = get_html('https://avsox.host/cn/search/' + number.replace('_', ''))
-    soup = BeautifulSoup(web, 'lxml')
+            print(a)
-    info = str(soup.find(attrs={'class': 'row movie'}))
+            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    dic = {
+            result1 = str(html.xpath('//*[@id="waterfall"]/div/a/@href')).strip(" ['']")
-        'actor': getActor(web),
+    web = get_html(result1)
-        'title': getTitle(web).strip(getNum(web)),
+    soup = BeautifulSoup(web, 'lxml')
-        'studio': getStudio(info),
+    info = str(soup.find(attrs={'class': 'row movie'}))
-        'outline': '',#
+    dic = {
-        'runtime': getRuntime(info),
+        'actor': getActor(web),
-        'director': '', #
+        'title': getTitle(web).strip(getNum(web)),
-        'release': getRelease(info),
+        'studio': getStudio(info),
-        'number': getNum(info),
+        'outline': '',#
-        'cover': getCover(web),
+        'runtime': getRuntime(info),
-        'cover_small': getCover_small(a),
+        'director': '', #
-        'imagecut': 3,
+        'release': getRelease(info),
-        'tag': getTag(web),
+        'number': getNum(info),
-        'label': getLabel(info),
+        'cover': getCover(web),
-        'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
+        'cover_small': getCover_small(a),
-        'actor_photo': getActorPhoto(web),
+        'imagecut': 3,
-        'website': result1,
+        'tag': getTag(web),
-        'source': 'avsox.py',
+        'label': getLabel(info),
-    }
+        'year': getYear(getRelease(info)),  # str(re.search('\d{4}',getRelease(a)).group()),
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+        'actor_photo': getActorPhoto(web),
-    return js
+        'website': result1,
-
+        'source': 'avsox.py',
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js
 #print(main('012717_472'))
--- a/SiteSource/fanza.py
+++ b/SiteSource/fanza.py
@ -0,0 +1,229 @@
 #!/usr/bin/python3
 # -*- coding: utf-8 -*-
 import json
 import re
 from lxml import etree
 from ADC_function import *
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 def getTitle(text):
    html = etree.fromstring(text, etree.HTMLParser())
    result = html.xpath('//*[@id="title"]/text()')[0]
    return result
 def getActor(text):
    # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
    html = etree.fromstring(text, etree.HTMLParser())
    result = (
        str(
            html.xpath(
                "//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()"
            )
        )
        .strip(" ['']")
        .replace("', '", ",")
    )
    return result
 def getStudio(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath(
            "//td[contains(text(),'メーカー')]/following-sibling::td/a/text()"
        )[0]
    except:
        result = html.xpath(
            "//td[contains(text(),'メーカー')]/following-sibling::td/text()"
        )[0]
    return result
 def getRuntime(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    result = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
    return re.search(r"\d+", str(result)).group()
 def getLabel(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath(
            "//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()"
        )[0]
    except:
        result = html.xpath(
            "//td[contains(text(),'シリーズ：')]/following-sibling::td/text()"
        )[0]
    return result
 def getNum(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath(
            "//td[contains(text(),'品番：')]/following-sibling::td/a/text()"
        )[0]
    except:
        result = html.xpath(
            "//td[contains(text(),'品番：')]/following-sibling::td/text()"
        )[0]
    return result
 def getYear(getRelease):
    try:
        result = str(re.search(r"\d{4}", getRelease).group())
        return result
    except:
        return getRelease
 def getRelease(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath(
            "//td[contains(text(),'発売日：')]/following-sibling::td/a/text()"
        )[0].lstrip("\n")
    except:
        result = html.xpath(
            "//td[contains(text(),'発売日：')]/following-sibling::td/text()"
        )[0].lstrip("\n")
    return result
 def getTag(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath(
            "//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()"
        )
    except:
        result = html.xpath(
            "//td[contains(text(),'ジャンル：')]/following-sibling::td/text()"
        )
    return result
 def getCover(text, number):
    html = etree.fromstring(text, etree.HTMLParser())
    cover_number = number
    try:
        result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
    except:
        # sometimes fanza modify _ to \u0005f for image id
        if "_" in cover_number:
            cover_number = cover_number.replace("_", r"\u005f")
        try:
            result = html.xpath('//*[@id="' + cover_number + '"]/@href')[0]
        except:
            # (TODO) handle more edge case
            # print(html)
            # raise exception here, same behavior as before
            # people's major requirement is fetching the picture
            raise ValueError("can not find image")
    return result
 def getDirector(text):
    html = etree.fromstring(text, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
    try:
        result = html.xpath(
            "//td[contains(text(),'監督：')]/following-sibling::td/a/text()"
        )[0]
    except:
        result = html.xpath(
            "//td[contains(text(),'監督：')]/following-sibling::td/text()"
        )[0]
    return result
 def getOutline(text):
    html = etree.fromstring(text, etree.HTMLParser())
    try:
        result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace(
            "\n", ""
        )
        if result == "":
            result = str(html.xpath("//div[@class='mg-b20 lh4']//p/text()")[0]).replace(
                "\n", ""
            )
    except:
        # (TODO) handle more edge case
        # print(html)
        return ""
    return result
 def main(number):
    # fanza allow letter + number + underscore, normalize the input here
    # @note: I only find the usage of underscore as h_test123456789
    fanza_search_number = number
    # AV_Data_Capture.py.getNumber() over format the input, restore the h_ prefix
    if fanza_search_number.startswith("h-"):
        fanza_search_number = fanza_search_number.replace("h-", "h_")
    fanza_search_number = re.sub(r"[^0-9a-zA-Z_]", "", fanza_search_number).lower()
    fanza_urls = [
        "https://www.dmm.co.jp/digital/videoa/-/detail/=/cid=",
        "https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=",
        "https://www.dmm.co.jp/digital/anime/-/detail/=/cid=",
        "https://www.dmm.co.jp/mono/anime/-/detail/=/cid=",
    ]
    chosen_url = ""
    for url in fanza_urls:
        chosen_url = url + fanza_search_number
        htmlcode = get_html(chosen_url)
        if "404 Not Found" not in htmlcode:
            break
    if "404 Not Found" in htmlcode:
        return json.dumps({"title": "",})
    try:
        # for some old page, the input number does not match the page
        # for example, the url will be cid=test012
        # but the hinban on the page is test00012
        # so get the hinban first, and then pass it to following functions
        fanza_hinban = getNum(htmlcode)
        data = {
            "title": getTitle(htmlcode).strip(getActor(htmlcode)),
            "studio": getStudio(htmlcode),
            "outline": getOutline(htmlcode),
            "runtime": getRuntime(htmlcode),
            "director": getDirector(htmlcode) if "anime" not in chosen_url else "",
            "actor": getActor(htmlcode) if "anime" not in chosen_url else "",
            "release": getRelease(htmlcode),
            "number": fanza_hinban,
            "cover": getCover(htmlcode, fanza_hinban),
            "imagecut": 1,
            "tag": getTag(htmlcode),
            "label": getLabel(htmlcode),
            "year": getYear(
                getRelease(htmlcode)
            ),  # str(re.search('\d{4}',getRelease(a)).group()),
            "actor_photo": "",
            "website": chosen_url,
            "source": "fanza.py",
        }
    except:
        data = {
            "title": "",
        }
    js = json.dumps(
        data, ensure_ascii=False, sort_keys=True, indent=4, separators=(",", ":")
    )  # .encode('UTF-8')
    return js
 if __name__ == "__main__":
    # print(main("DV-1562"))
    # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
    # print(main("ipx292"))
    pass
--- a/SiteSource/fc2fans_club.py
+++ b/SiteSource/fc2fans_club.py
@ -0,0 +1,162 @@
 import re
 from lxml import etree#need install
 import json
 import ADC_function
 # import sys
 # import io
 # sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
 def getTitle(htmlcode): #获取厂商
    #print(htmlcode)
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
    result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
    #print(result2)
    return result2
 def getActor(htmlcode):
    try:
        html = etree.fromstring(htmlcode, etree.HTMLParser())
        result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
        return result
    except:
        return ''
 def getStudio(htmlcode): #获取厂商
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
    return result
 def getNum(htmlcode):     #获取番号
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
    #print(result)
    return result
 def getRelease(htmlcode2): #
    #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
    html=etree.fromstring(htmlcode2,etree.HTMLParser())
    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
    return result
 def getCover(htmlcode,number,htmlcode2): #获取厂商 #
    #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
    html = etree.fromstring(htmlcode2, etree.HTMLParser())
    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
    if result == '':
        html = etree.fromstring(htmlcode, etree.HTMLParser())
        result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
        return 'https://fc2club.com' +  result2
    return 'http:' + result
 def getOutline(htmlcode2):     #获取番号 #
    html = etree.fromstring(htmlcode2, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
    return result
 def getTag(htmlcode):     #获取番号
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
    return result.strip(" ['']").replace("'",'').replace(' ','')
 def getYear(release):
    try:
        result = re.search('\d{4}',release).group()
        return result
    except:
        return ''
 def getTitle_fc2com(htmlcode): #获取厂商
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/h3/text()')[0]
    return result
 def getActor_fc2com(htmlcode):
    try:
        html = etree.fromstring(htmlcode, etree.HTMLParser())
        result = html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')[0]
        return result
    except:
        return ''
 def getStudio_fc2com(htmlcode): #获取厂商
    try:
        html = etree.fromstring(htmlcode, etree.HTMLParser())
        result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[2]/ul/li[3]/a/text()')).strip(" ['']")
        return result
    except:
        return ''
 def getNum_fc2com(htmlcode):     #获取番号
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
    return result
 def getRelease_fc2com(htmlcode2): #
    html=etree.fromstring(htmlcode2,etree.HTMLParser())
    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
    return result
 def getCover_fc2com(htmlcode2): #获取厂商 #
    html = etree.fromstring(htmlcode2, etree.HTMLParser())
    result = str(html.xpath('//*[@id="top"]/div[1]/section[1]/div/section/div[1]/span/img/@src')).strip(" ['']")
    return 'http:' + result
 def getOutline_fc2com(htmlcode2):     #获取番号 #
    html = etree.fromstring(htmlcode2, etree.HTMLParser())
    result = str(html.xpath('/html/body/div/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
    return result
 def getTag_fc2com(number):     #获取番号
    htmlcode = str(bytes(ADC_function.get_html('http://adult.contents.fc2.com/api/v4/article/'+number+'/tag?'),'utf-8').decode('unicode-escape'))
    result = re.findall('"tag":"(.*?)"', htmlcode)
    return result
 def getYear_fc2com(release):
    try:
        result = re.search('\d{4}',release).group()
        return result
    except:
        return ''
 def main(number):
    try:
        htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/'+number+'/')
        htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
        actor = getActor(htmlcode)
        if getActor(htmlcode) == '':
            actor = 'FC2系列'
        dic = {
            'title':    getTitle(htmlcode),
            'studio':   getStudio(htmlcode),
            'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
            'outline':  '',#getOutline(htmlcode2),
            'runtime':  getYear(getRelease(htmlcode)),
            'director': getStudio(htmlcode),
            'actor':    actor,
            'release':  getRelease(number),
            'number':  'FC2-'+number,
            'label': '',
            'cover':    getCover(htmlcode,number,htmlcode2),
            'imagecut': 0,
            'tag':      getTag(htmlcode),
            'actor_photo':'',
            'website':  'https://fc2club.com//html/FC2-' + number + '.html',
            'source':'https://fc2club.com//html/FC2-' + number + '.html',
        }
        if dic['title'] == '':
            htmlcode2 = ADC_function.get_html('https://adult.contents.fc2.com/article/' + number + '/',cookies={'wei6H':'1'})
            actor = getActor(htmlcode)
            if getActor(htmlcode) == '':
                actor = 'FC2系列'
            dic = {
                'title': getTitle_fc2com(htmlcode2),
                'studio': getStudio_fc2com(htmlcode2),
                'year': '',  # str(re.search('\d{4}',getRelease(number)).group()),
                'outline': getOutline_fc2com(htmlcode2),
                'runtime': getYear_fc2com(getRelease(htmlcode2)),
                'director': getStudio_fc2com(htmlcode2),
                'actor': actor,
                'release': getRelease_fc2com(number),
                'number': 'FC2-' + number,
                'cover': getCover_fc2com(htmlcode2),
                'imagecut': 0,
                'tag': getTag_fc2com(number),
                'label': '',
                'actor_photo': '',
                'website': 'http://adult.contents.fc2.com/article/' + number + '/',
                'source': 'http://adult.contents.fc2.com/article/' + number + '/',
            }
    except Exception as e:
        # (TODO) better handle this
        # print(e)
        dic = {"title": ""}
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
    return js
 #print(main('1252953'))
--- a/SiteSource/javbus.py
+++ b/SiteSource/javbus.py
@ -1,138 +1,139 @@
-import re
+import re
-from pyquery import PyQuery as pq#need install
+from pyquery import PyQuery as pq#need install
-from lxml import etree#need install
+from lxml import etree#need install
-from bs4 import BeautifulSoup#need install
+from bs4 import BeautifulSoup#need install
-import json
+import json
-from ADC_function import *
+from ADC_function import *
-
+
-def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'star-name'})
+    a = soup.find_all(attrs={'class': 'star-name'})
-    d={}
+    d={}
-    for i in a:
+    for i in a:
-        l=i.a['href']
+        l=i.a['href']
-        t=i.get_text()
+        t=i.get_text()
-        html = etree.fromstring(get_html(l), etree.HTMLParser())
+        html = etree.fromstring(get_html(l), etree.HTMLParser())
-        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
-        p2={t:p}
+        p2={t:p}
-        d.update(p2)
+        d.update(p2)
-    return d
+    return d
-def getTitle(htmlcode):  #获取标题
+def getTitle(htmlcode):  #获取标题
-    doc = pq(htmlcode)
+    doc = pq(htmlcode)
-    title=str(doc('div.container h3').text()).replace(' ','-')
+    title=str(doc('div.container h3').text()).replace(' ','-')
-    try:
+    try:
-        title2 = re.sub('n\d+-','',title)
+        title2 = re.sub('n\d+-','',title)
-        return title2
+        return title2
-    except:
+    except:
-        return title
+        return title
-def getStudio(htmlcode): #获取厂商
+def getStudio(htmlcode): #获取厂商
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[5]/a/text()')).strip(" ['']")
-    return result
+    return result
-def getYear(htmlcode):   #获取年份
+def getYear(htmlcode):   #获取年份
-    html = etree.fromstring(htmlcode,etree.HTMLParser())
+    html = etree.fromstring(htmlcode,etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
-    return result
+    return result
-def getCover(htmlcode):  #获取封面链接
+def getCover(htmlcode):  #获取封面链接
-    doc = pq(htmlcode)
+    doc = pq(htmlcode)
-    image = doc('a.bigImage')
+    image = doc('a.bigImage')
-    return image.attr('href')
+    return image.attr('href')
-def getRelease(htmlcode): #获取出版日期
+def getRelease(htmlcode): #获取出版日期
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
-    return result
+    return result
-def getRuntime(htmlcode): #获取分钟
+def getRuntime(htmlcode): #获取分钟
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find(text=re.compile('分鐘'))
+    a = soup.find(text=re.compile('分鐘'))
-    return a
+    return a
-def getActor(htmlcode):   #获取女优
+def getActor(htmlcode):   #获取女优
-    b=[]
+    b=[]
-    soup=BeautifulSoup(htmlcode,'lxml')
+    soup=BeautifulSoup(htmlcode,'lxml')
-    a=soup.find_all(attrs={'class':'star-name'})
+    a=soup.find_all(attrs={'class':'star-name'})
-    for i in a:
+    for i in a:
-        b.append(i.get_text())
+        b.append(i.get_text())
-    return b
+    return b
-def getNum(htmlcode):     #获取番号
+def getNum(htmlcode):     #获取番号
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
-    return result
+    return result
-def getDirector(htmlcode): #获取导演
+def getDirector(htmlcode): #获取导演
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[4]/a/text()')).strip(" ['']")
-    return result
+    return result
-def getOutline(htmlcode):  #获取演员
+def getOutline(htmlcode):  #获取演员
-    doc = pq(htmlcode)
+    doc = pq(htmlcode)
-    result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
+    result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
-    return result
+    return result
-def getSerise(htmlcode):
+def getSerise(htmlcode):
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
+    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
-    return result
+    return result
-def getTag(htmlcode):  # 获取演员
+def getTag(htmlcode):  # 获取演员
-    tag = []
+    tag = []
-    soup = BeautifulSoup(htmlcode, 'lxml')
+    soup = BeautifulSoup(htmlcode, 'lxml')
-    a = soup.find_all(attrs={'class': 'genre'})
+    a = soup.find_all(attrs={'class': 'genre'})
-    for i in a:
+    for i in a:
-        if 'onmouseout' in str(i):
+        if 'onmouseout' in str(i):
-            continue
+            continue
-        tag.append(i.get_text())
+        tag.append(i.get_text())
-    return tag
+    return tag
-
+
-
+
-def main(number):
+def main(number):
-    try:
+    try:
-        htmlcode = get_html('https://www.javbus.com/' + number)
+        htmlcode = get_html('https://www.javbus.com/' + number)
-        try:
+        try:
-            dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+            dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
-        except:
+        except:
-            dww_htmlcode = ''
+            dww_htmlcode = ''
-        dic = {
+        dic = {
-            'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
+            'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
-            'studio': getStudio(htmlcode),
+            'studio': getStudio(htmlcode),
-            'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
+            'year': str(re.search('\d{4}', getYear(htmlcode)).group()),
-            'outline': getOutline(dww_htmlcode),
+            'outline': getOutline(dww_htmlcode),
-            'runtime': getRuntime(htmlcode),
+            'runtime': getRuntime(htmlcode),
-            'director': getDirector(htmlcode),
+            'director': getDirector(htmlcode),
-            'actor': getActor(htmlcode),
+            'actor': getActor(htmlcode),
-            'release': getRelease(htmlcode),
+            'release': getRelease(htmlcode),
-            'number': getNum(htmlcode),
+            'number': getNum(htmlcode),
-            'cover': getCover(htmlcode),
+            'cover': getCover(htmlcode),
-            'imagecut': 1,
+            'imagecut': 1,
-            'tag': getTag(htmlcode),
+            'tag': getTag(htmlcode),
-            'label': getSerise(htmlcode),
+            'label': getSerise(htmlcode),
-            'actor_photo': getActorPhoto(htmlcode),
+            'actor_photo': getActorPhoto(htmlcode),
-            'website': 'https://www.javbus.com/' + number,
+            'website': 'https://www.javbus.com/' + number,
-            'source' : 'javbus.py',
+            'source' : 'javbus.py',
-        }
+        }
-        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-        return js
+        return js
-    except:
+    except:
-        return main_uncensored(number)
+        return main_uncensored(number)
-
+
-def main_uncensored(number):
+
-    htmlcode = get_html('https://www.javbus.com/' + number)
+def main_uncensored(number):  # 无码
-    dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+    htmlcode = get_html('https://www.javbus.com/' + number)
-    if getTitle(htmlcode) == '':
+    dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
-        htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
+    if getTitle(htmlcode) == '':
-        dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+        htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
-    dic = {
+        dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
-        'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
+    dic = {
-        'studio': getStudio(htmlcode),
+        'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))).replace(getNum(htmlcode)+'-', ''),
-        'year': getYear(htmlcode),
+        'studio': getStudio(htmlcode),
-        'outline': getOutline(dww_htmlcode),
+        'year': getYear(htmlcode),
-        'runtime': getRuntime(htmlcode),
+        'outline': getOutline(dww_htmlcode),
-        'director': getDirector(htmlcode),
+        'runtime': getRuntime(htmlcode),
-        'actor': getActor(htmlcode),
+        'director': getDirector(htmlcode),
-        'release': getRelease(htmlcode),
+        'actor': getActor(htmlcode),
-        'number': getNum(htmlcode),
+        'release': getRelease(htmlcode),
-        'cover': getCover(htmlcode),
+        'number': getNum(htmlcode),
-        'tag': getTag(htmlcode),
+        'cover': getCover(htmlcode),
-        'label': getSerise(htmlcode),
+        'tag': getTag(htmlcode),
-        'imagecut': 0,
+        'label': getSerise(htmlcode),
-        'actor_photo': '',
+        'imagecut': 0,
-        'website': 'https://www.javbus.com/' + number,
+        'actor_photo': '',
-        'source': 'javbus.py',
+        'website': 'https://www.javbus.com/' + number,
-    }
+        'source': 'javbus.py',
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+    }
-    return js
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-
+    return js
--- a/SiteSource/javdb.py
+++ b/SiteSource/javdb.py
@ -1,152 +1,123 @@
-import re
+import re
-from lxml import etree
+from lxml import etree
-import json
+import json
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-from ADC_function import *
+from ADC_function import *
-
+# import sys
-def getTitle(a):
+# import io
-    try:
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-        html = etree.fromstring(a, etree.HTMLParser())
+
-        result = str(html.xpath('/html/body/section/div/h2/strong/text()')).strip(" ['']")
+def getTitle(a):
-        return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', '').replace(' : ', ''))
+    html = etree.fromstring(a, etree.HTMLParser())
-    except:
+    result = html.xpath("/html/body/section/div/h2/strong/text()")[0]
-        return re.sub('.*\] ', '', result.replace('/', ',').replace('\\xa0', ''))
+    return result
-def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+def getActor(a):  # //*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"演員")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
+    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',').replace(',', ', ')
-def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
+def getActorPhoto(actor): #//*[@id="star_qdt"]/li/a/img
-    a = actor.split(',')
+    a = actor.split(',')
-    d={}
+    d={}
-    for i in a:
+    for i in a:
-        p={i:''}
+        p={i:''}
-        d.update(p)
+        d.update(p)
-    return d
+    return d
-def getStudio(a):
+def getStudio(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"片商")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getRuntime(a):
+def getRuntime(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"時長")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').rstrip('mi')
+    return str(result1 + result2).strip('+').rstrip('mi')
-def getLabel(a):
+def getLabel(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"系列")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-def getNum(a):
+def getNum(a):
-    html = etree.fromstring(a, etree.HTMLParser())
+    html = etree.fromstring(a, etree.HTMLParser())
-    result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"番號")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result2 + result1).strip('+')
+    return str(result2 + result1).strip('+')
-def getYear(getRelease):
+def getYear(getRelease):
-    try:
+    try:
-        result = str(re.search('\d{4}', getRelease).group())
+        result = str(re.search('\d{4}', getRelease).group())
-        return result
+        return result
-    except:
+    except:
-        return getRelease
+        return getRelease
-def getRelease(a):
+def getRelease(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"時間")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+')
+    return str(result1 + result2).strip('+')
-def getTag(a):
+def getTag(a):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/text()')).strip(" ['']")
-    result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result2 = str(html.xpath('//strong[contains(text(),"类别")]/../following-sibling::span/a/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
+    return str(result1 + result2).strip('+').replace(",\\xa0", "").replace("'", "").replace(' ', '').replace(',,', '').lstrip(',')
-def getCover_small(a):
+def getCover_small(a, index=0):
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    # same issue mentioned below,
-    result = 'http:' + html.xpath(
+    # javdb sometime returns multiple results
-        '//div[@id=\'videos\']/div[@class=\'grid columns\']/div[@class=\'grid-item column\'][1]/a['
+    # DO NOT just get the firt one, get the one with correct index number
-        '@class=\'box\']/div[@class=\'item-image fix-scale-cover\']/img/@src')[0]
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    return result
+    result = html.xpath("//div[@class='item-image fix-scale-cover']/img/@src")[index]
-def getCover(htmlcode):
+    if not 'https' in result:
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+        result = 'https:' + result
-    result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']")
+    return result
-    if result == '':
+def getCover(htmlcode):
-        result = str(html.xpath('/html/body/section/div/div[4]/div[1]/a/img/@src')).strip(" ['']")
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    return result
+    result = str(html.xpath("//div[@class='column column-video-cover']/a/img/@src")).strip(" ['']")
-def getDirector(a):
+    return result
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+def getDirector(a):
-    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
+    result1 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/text()')).strip(" ['']")
-    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
+    result2 = str(html.xpath('//strong[contains(text(),"導演")]/../following-sibling::span/a/text()')).strip(" ['']")
-def getOutline(htmlcode):
+    return str(result1 + result2).strip('+').replace("', '", '').replace('"', '')
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+def getOutline(htmlcode):
-    result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    return result
+    result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
-def main(number):
+    return result
-    number = number.upper()
+def main(number):
-    try:
+    try:
-        a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ')
+        number = number.upper()
-        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        query_result = get_html('https://javdb.com/search?q=' + number + '&f=all')
-        result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0]
+        html = etree.fromstring(query_result, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ')
+        # javdb sometime returns multiple results,
-        dic = {
+        # and the first elememt maybe not the one we are looking for
-            'actor': getActor(b),
+        # iterate all candidates and find the match one
-            'title': getTitle(b).replace("\\n", '').replace('        ', '').replace(getActor(a), '').replace(getNum(a),
+        urls = html.xpath('//*[@id="videos"]/div/div/a/@href')
-                                                                                                             '').replace(
+        ids =html.xpath('//*[@id="videos"]/div/div/a/div[contains(@class, "uid")]/text()')
-                '无码', '').replace('有码', '').lstrip(' ').replace(number,''),
+        correct_url = urls[ids.index(number)]
-            'studio': getStudio(b),
+        detail_page = get_html('https://javdb.com' + correct_url)
-            'outline': getOutline(b),
+        dic = {
-            'runtime': getRuntime(b),
+            'actor': getActor(detail_page),
-            'director': getDirector(b),
+            'title': getTitle(detail_page),
-            'release': getRelease(b),
+            'studio': getStudio(detail_page),
-            'number': getNum(b),
+            'outline': getOutline(detail_page),
-            'cover': getCover(b),
+            'runtime': getRuntime(detail_page),
-            'cover_small': getCover_small(a),
+            'director': getDirector(detail_page),
-            'imagecut': 3,
+            'release': getRelease(detail_page),
-            'tag': getTag(b),
+            'number': getNum(detail_page),
-            'label': getLabel(b),
+            'cover': getCover(detail_page),
-            'year': getYear(getRelease(b)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'cover_small': getCover_small(query_result, index=ids.index(number)),
-            'actor_photo': getActorPhoto(getActor(b)),
+            'imagecut': 3,
-            'website': 'https://javdb.com' + result1,
+            'tag': getTag(detail_page),
-            'source': 'javdb.py',
+            'label': getLabel(detail_page),
-        }
+            'year': getYear(getRelease(detail_page)),  # str(re.search('\d{4}',getRelease(a)).group()),
-        if getNum(b) != number:  # 与搜索到的番号不匹配
+            'actor_photo': getActorPhoto(getActor(detail_page)),
-            dic['title'] = ''
+            'website': 'https://javdb.com' + correct_url,
-            dic['number'] = ''
+            'source': 'javdb.py',
-        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+        }
-        return js
+    except Exception as e:
-    except:
+        # print(e)
-        a = get_html('https://javdb.com/search?q=' + number + '&f=all').replace(u'\xa0', u' ')
+        dic = {"title": ""}
-        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-        result1 = html.xpath('//*[@id="videos"]/div/div/a/@href')[0]
+    return js
-        print(html.xpath('//*[@id="videos"]/div/div/a/@href'))
+
-        b = get_html('https://javdb.com' + result1).replace(u'\xa0', u' ')
+# main('DV-1562')
-        dic = {
+# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
-            'actor': getActor(b),
+#print(main('ipx-292'))
            'title': getTitle(b).replace("\\n", '').replace('        ', '').replace(getActor(a), '').replace(
                getNum(b),
                '').replace(
                '无码', '').replace('有码', '').lstrip(' ').replace(number,''),
            'studio': getStudio(b),
            'outline': getOutline(b),
            'runtime': getRuntime(b),
            'director': getDirector(b),
            'release': getRelease(b),
            'number': getNum(b),
            'cover': getCover(b),
            'cover_small': getCover_small(a),
            'imagecut': 3,
            'tag': getTag(b),
            'label': getLabel(b),
            'year': getYear(getRelease(b)),  # str(re.search('\d{4}',getRelease(a)).group()),
            'actor_photo': getActorPhoto(getActor(b)),
            'website': 'https://javdb.com' + result1,
            'source': 'javdb.py',
        }
        if getNum(b) != number:  # 与搜索到的番号不匹配
            dic['title'] = ''
            dic['number'] = ''
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
        return js
 # main('DV-1562')
 # input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
 # print(get_html('https://javdb1.com/v/WwZ0Q'))
--- a/SiteSource/mgstage.py
+++ b/SiteSource/mgstage.py
@ -1,105 +1,108 @@
-import re
+import re
-from lxml import etree
+from lxml import etree
-import json
+import json
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup
-from ADC_function import *
+from ADC_function import *
-
+# import sys
-def getTitle(a):
+# import io
-    try:
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
-        html = etree.fromstring(a, etree.HTMLParser())
+
-        result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
+def getTitle(a):
-        return result.replace('/', ',')
+    try:
-    except:
+        html = etree.fromstring(a, etree.HTMLParser())
-        return ''
+        result = str(html.xpath('//*[@id="center_column"]/div[1]/h1/text()')).strip(" ['']")
-def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+        return result.replace('/', ',')
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+    except:
-    result1=str(html.xpath('//th[contains(text(),"出演：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+        return ''
-    result2=str(html.xpath('//th[contains(text(),"出演：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
+    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-def getStudio(a):
+    result1=str(html.xpath('//th[contains(text(),"出演：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
+    result2=str(html.xpath('//th[contains(text(),"出演：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result1=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',')
-    result2=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+def getStudio(a):
-    return str(result1+result2).strip('+').replace("', '",'').replace('"','')
+    html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
-def getRuntime(a):
+    result1=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result2=str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result1 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+    return str(result1+result2).strip('+').replace("', '",'').replace('"','')
-    result2 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
+def getRuntime(a):
-    return str(result1 + result2).strip('+').rstrip('mi')
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-def getLabel(a):
+    result1 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result2 = str(html.xpath('//th[contains(text(),"収録時間：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n')
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    return str(result1 + result2).strip('+').rstrip('mi')
-        '\\n')
+def getLabel(a):
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        '\\n')
+    result1 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+        '\\n')
-def getNum(a):
+    result2 = str(html.xpath('//th[contains(text(),"シリーズ：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        '\\n')
-    result1 = str(html.xpath('//th[contains(text(),"品番：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-        '\\n')
+def getNum(a):
-    result2 = str(html.xpath('//th[contains(text(),"品番：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        '\\n')
+    result1 = str(html.xpath('//th[contains(text(),"品番：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-    return str(result1 + result2).strip('+')
+        '\\n')
-def getYear(getRelease):
+    result2 = str(html.xpath('//th[contains(text(),"品番：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-    try:
+        '\\n')
-        result = str(re.search('\d{4}',getRelease).group())
+    return str(result1 + result2).strip('+')
-        return result
+def getYear(getRelease):
-    except:
+    try:
-        return getRelease
+        result = str(re.search('\d{4}',getRelease).group())
-def getRelease(a):
+        return result
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    except:
-    result1 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+        return getRelease
-        '\\n')
+def getRelease(a):
-    result2 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        '\\n')
+    result1 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-    return str(result1 + result2).strip('+')
+        '\\n')
-def getTag(a):
+    result2 = str(html.xpath('//th[contains(text(),"配信開始日：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+        '\\n')
-    result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    return str(result1 + result2).strip('+')
-        '\\n')
+def getTag(a):
-    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        '\\n')
+    result1 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-    return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
+        '\\n')
-def getCover(htmlcode):
+    result2 = str(html.xpath('//th[contains(text(),"ジャンル：")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+        '\\n')
-    result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
+    return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','')
-    #                    /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
+def getCover(htmlcode):
-    return result
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-def getDirector(a):
+    result = str(html.xpath('//*[@id="center_column"]/div[1]/div[1]/div/div/h2/img/@src')).strip(" ['']")
-    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    #                    /html/body/div[2]/article[2]/div[1]/div[1]/div/div/h2/img/@src
-    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
+    return result
-        '\\n')
+def getDirector(a):
-    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        '\\n')
+    result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip(
-    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
+        '\\n')
-def getOutline(htmlcode):
+    result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip(
-    html = etree.fromstring(htmlcode, etree.HTMLParser())
+        '\\n')
-    result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
+    return str(result1 + result2).strip('+').replace("', '",'').replace('"','')
-    return result
+def getOutline(htmlcode):
-def main(number2):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
-    number=number2.upper()
+    result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
-    htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
+    return result
-    soup = BeautifulSoup(htmlcode, 'lxml')
+def main(number2):
-    a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
+    number=number2.upper()
-    dic = {
+    htmlcode=str(get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}))
-        'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''),
+    soup = BeautifulSoup(htmlcode, 'lxml')
-        'studio': getStudio(a),
+    a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','')
-        'outline': getOutline(htmlcode),
+    dic = {
-        'runtime': getRuntime(a),
+        'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''),
-        'director': getDirector(a),
+        'studio': getStudio(a),
-        'actor': getActor(a),
+        'outline': getOutline(htmlcode),
-        'release': getRelease(a),
+        'runtime': getRuntime(a),
-        'number': getNum(a),
+        'director': getDirector(a),
-        'cover': getCover(htmlcode),
+        'actor': getActor(a),
-        'imagecut': 0,
+        'release': getRelease(a),
-        'tag': getTag(a),
+        'number': getNum(a),
-        'label':getLabel(a),
+        'cover': getCover(htmlcode),
-        'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
+        'imagecut': 0,
-        'actor_photo': '',
+        'tag': getTag(a),
-        'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
+        'label':getLabel(a),
-        'source': 'siro.py',
+        'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
-    }
+        'actor_photo': '',
-    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
+        'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
-    return js
+        'source': 'mgstage.py',
-
+    }
-#print(main('SIRO-3607'))
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
    return js
 #print(main('SIRO-3607'))
--- a/TestPathNFO.txt
+++ b/TestPathNFO.txt
@ -0,0 +1,41 @@
 /Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi
 /Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4
 /Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21299.mkv
 /Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
 /mcdv47.avi
 /mcdv-47.avi
 /mcdv-047.mp4
 /mcdv047.mp4
 /mcdv0047.mp4
 /1pondo-070409_621.mp4
 /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
 /Volumes/Adult/Files/107NTTR-037A.mp4
 /Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発！禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
 /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
 /Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発！ CD1.wmv
 /Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
 /Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
 /Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて～波多野結衣～.rmvb
 /Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
 /Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
 /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
 /Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
 /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
 /Volumes/Adult/Files/tia/soe935C.HD.wmv
 /Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
 /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
 /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
 /Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
 /Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
 /Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
 /Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi
 /Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
 /Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
 /Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
 /Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv
--- a/TestPathSpecial.txt
+++ b/TestPathSpecial.txt
@ -0,0 +1,51 @@
 /Volumes/192.168.2.100/Adult/Files/Aki Sasaki Megapack/HODV-21222.mkv
 /Volumes/Adult/Files/ノ瀬アメリ/Tokyo Hot N0646.avi
 /Volumes/Adult/Files/ノ瀬アメリ/MKBD_S03-MaRieS.mp4
 /Volumes/192.168.2.100/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
 /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-1 彼女の姉貴とイケナイ関係 Rio.wmv
 /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999A 彼女の姉貴とイケナイ関係 Rio.wmv
 /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-A 彼女の姉貴とイケナイ関係 Rio.wmv
 /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-C 彼女の姉貴とイケナイ関係 Rio.wmv
 /Volumes/192.168.2.100/Adult/Files/Rating Top 30 JAV pack/IPTD-999-B 彼女の姉貴とイケナイ関係 Rio.wmv
 /Volumes/192.168.2.100/Adult/Files/tia/soe935C.HD.wmv
 /Volumes/192.168.2.100/Adult/Files/tia/soe935B.HD.wmv
 /Volumes/192.168.2.100/Adult/Files/tia/soe935A.HD.wmv
 /Volumes/192.168.2.100/Adult/Files/tia/soe935D.HD.wmv
 /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
 /Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
 /mcdv47.avi
 /mcdv-47.avi
 /mcdv-047.mp4
 /mcdv047.mp4
 /mcdv0047.mp4
 /1pondo-070409_621.mp4
 /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
 /Volumes/Adult/Files/107NTTR-037A.mp4
 /Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発！禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
 /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
 /Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発！ CD1.wmv
 /Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
 /Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
 /Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて～波多野結衣～.rmvb
 /Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
 /Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
 /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
 /Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
 /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
 /Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
 /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
 /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
 /Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
 /Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
 /Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
 /Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi
 /Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
 /Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
 /Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
 /Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv
--- a/TestPaths.txt
+++ b/TestPaths.txt
@ -0,0 +1,50 @@
 /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#1(181222)@RUNBKK/No-Watermarked/HOBD00015.FHD2.wmv
 /1pondo-070409_621.mp4
 /Volumes/Adult/Files/107NTTR-037.mp4
 /Volumes/Adult/Files/107NTTR-037A.mp4
 /Volumes/Adult/Files/Yua.Mikami-PML/TEK-097 ふたりは無敵.wmv
 /Volumes/Adult/Files/Yua.Mikami-PML/SNIS-986 国民的アイドル アドレナリン大爆発！禁欲1ヶ月後の性欲剥き出し焦らされトランスFUCK 三上悠亜【桃花族】.mp4
 /Volumes/Adult/Files/Yua.Mikami-PML/SSNI-030 三上悠亜ファン感謝祭 国民的アイドル×一般ユーザー20人‘ガチファンとSEX解禁’ハメまくりスペシャル【桃花族】.mp4
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/MIDD-893A.mkv
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 2/FHD/UPSM-109_2.mkv
 /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#2(181231)@RUNBKK/No-Watermarked/PPT003.SD3.wmv
 /Volumes/Adult/Files/波多野结衣/THE波多野結衣 ぶっかけ50連発！ CD1.wmv
 /Volumes/Adult/Files/波多野结衣/欲しがり 後編 波多野結衣.wmv
 /Volumes/Adult/Files/波多野结衣/欲しがり 前編 波多野結衣.wmv
 /Volumes/Adult/Files/波多野结衣/加勒比 062212-055 夫の目の前で妻が ~元上司に縛られて～波多野結衣～.rmvb
 /Volumes/Adult/Files/波多野结衣/022213-271-carib-whole_s.mp4
 /Volumes/Adult/Files/桜木凛 Rin Sakuragi FHD Collection Pack Vol/BBI-183.wmv
 /Volumes/Adult/Files/NOP-019 芭蕾教室 水嶋あずみ/NOP019B.HD.wmv
 /Volumes/Adult/Files/一ノ瀬アメリ part2/栗栖エリカ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
 /Volumes/Adult/Files/一ノ瀬アメリ part2/Max Girls/Max Girls 24(xv804)伊東遥,Rio,小沢アリス,葉月しおり,一ノ瀬アメリ,ひなた結衣,藤崎りお.avi
 /Volumes/Adult/Files/一ノ瀬アメリ part2/ノ瀬アメリAmeri Ichinose/20091127一ノ瀬アメリ - 一見面就做愛(xv801).avi
 /Volumes/Adult/Files/Aki Sasaki Megapack/MSTG-003.mkv
 /Volumes/Adult/Files/SKYHD-001~010/SKYHD-009_H265.mkv
 /Volumes/Adult/Files/大桥步兵合集/LAFBD-41.LaForet.Girl.41.angel.and.devil.Miku.Ohashi.2015.Bluray.1080p.x264.ac3-MTeam.mkv
 /Volumes/Adult/Files/大桥步兵合集/032015_161-caribpr-high.mp4
 /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-171)彼女のお姉さんは、誘惑ヤリたがり娘。桃谷エリカ.wmv
 /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/(PRESTIGE)(ABP-145)濃密な接吻と欲情ベロキス性交 04 桃谷エリカ.wmv
 /Volumes/Adult/Files/桃谷绘里香(桃谷エリカ) 所有作品集合/118ppt00016hhb2.mkv
 /Volumes/Adult/Files/tia/soe935C.HD.wmv
 /Volumes/Adult/Files/SKYHD-011~020/SKYHD-020_H265.mkv
 /Volumes/Adult/Files/sakumomo1203-PML/IDBD-795 ももに夢中 2018年日本人にもっとも愛された女優桜空ももPREMIUM BOX8時間BEST.mp4
 /Volumes/Adult/Files/sakumomo1203-PML/IDBD-768 Gカップグラビアアイドル桜空もも初ベスト 原石 2【桃花族】.mp4
 /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/VR/sivr00008_E.mp4
 /Volumes/Adult/Files/RION(りおん).Utsunomiya.Shion.宇都宮しをん(うつのみやしをん)/DMM.Video/onsd00899hhb3.mp4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/SHKD-744 営業課長の湿ったパンスト 里美ゆりあ.mp4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/ABP-627 裏・鈴村あいり-鈴村あいりのオトナの激情SEX4本番 鈴村あいり.MP4
 /Volumes/Adult/Files/Rating Top 30 JAV pack/20 ABP-408 上原瑞穂/上原瑞穂 ABP-408 无码流出片段/[ThZu.Cc]20150909164411.m2ts
 /Volumes/Adult/Files/Caribbean-101717-520-HD/100917-515/100917-515-carib-1080p.mp4
 /Volumes/Adult/Files/Kirara Asuka (@明日花キララ) FHD Pack Vol#3(190119)@RUNBKK/No-Watermarked/SOE976.FHD3.wmv
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/RBD-406_1.mp4
 /Volumes/Adult/Files/(1.18TB) Julia movie pack collection Part 1/720p/MDYD-664B.mp4
 /Volumes/Adult/Files/ノ瀬アメリ/20081105栗栖エリカ - Sky Angel Blue 10 天舞超絕美少女天使降臨(skyhd010)(中文字幕).avi
 /Volumes/Adult/Files/ノ瀬アメリ/一ノ瀬アメリ~加勒比 VERY SEXY.wmv
 /Volumes/Adult/Files/ノ瀬アメリ/20101202一ノ瀬アメリ - 東京ブルドック05(inu006).avi
 /Volumes/Adult/Files/ノ瀬アメリ/Sky Angel Vol 80 - CD2.mp4
 /Volumes/Adult/Files/ノ瀬アメリ/20100226一ノ瀬アメリ - OL Style 制服(xv827).avi
 /Volumes/Adult/Files/Mika Sumire すみれ美香/Caribbean-091818-755.mp4
 /Volumes/Adult/Files/[Tokyo-Hot] [n1180] 美人秘書3穴串刺奉仕残業 (中井綾香 Ayaka Nakai)/(Tokyo-Hot)(n1180)美人秘書3穴串刺奉仕残業 中井綾香.mp4
 /Volumes/Adult/Files/Takizawa Rola/[HD]abp-031C.wmv
 /Volumes/Adult/Files/Takizawa Rola/ABP-013HDA.wmv
 /Volumes/Adult/Files/Uncensored Mosaic Removal Megapack/ADN-017(Asami Ogawa).mp4
--- a/config.ini
+++ b/config.ini
@ -1,27 +1,35 @@
 [common]
-main_mode=1
+main_mode=2
-failed_output_folder=failed
+# 路径均为绝对路径，不要写入" '等符号
-success_output_folder=JAV_output
+search_folder=          /Volumes/192.168.2.100/Adult/AVTest
 # 如果failed_output_folder 为空，抓取不到相关信息的视频将不回移动
 failed_output_folder=   /Volumes/192.168.2.100/Adult/UnknownStars
 success_output_folder=  /Volumes/192.168.2.100/Adult/Files
 #临时资源存储路径，比如xxx.nfo 海报图
 temp_folder=            /Volumes/192.168.2.100/Adult/temp
 # 如果是远程挂载的盘符，建议不开启创建软连接：软连接链接的是绝对路径，远程NAS上的路径和本地挂载的路径一般不同。
 soft_link=0
 [proxy]
-proxy=
+#例子为socks代理配置，可以 =后留空
-timeout=10
+proxy=      socks5h://127.0.0.1:1081
-retry=3
+timeout=    10
 retry=      5
 [Name_Rule]
-location_rule=actor+'/'+number
+location_rule=  actor+'/'+number
-naming_rule=number+'-'+title
+naming_rule=    number+'-'+title
 [update]
 update_check=1
 [media]
 media_warehouse=emby
 #emby or plex or kodi ,emby=jellyfin
 media_warehouse=EMBY
 [escape]
 literals=\()
 folders=/Volumes/Adult/UnknownStars,/Volumes/Adult/Stars
-[movie_location]
+[debug_mode]
-path=
+switch=1
--- a/core.py
+++ b/core.py
--- a/fc2fans_club.py
+++ b/fc2fans_club.py
@ -1,84 +0,0 @@
 import re
 from lxml import etree#need install
 import json
 import ADC_function
 def getTitle(htmlcode): #获取厂商
    #print(htmlcode)
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    result = str(html.xpath('/html/body/div[2]/div/div[1]/h3/text()')).strip(" ['']")
    result2 = str(re.sub('\D{2}2-\d+','',result)).replace(' ','',1)
    #print(result2)
    return result2
 def getActor(htmlcode):
    try:
        html = etree.fromstring(htmlcode, etree.HTMLParser())
        result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[5]/a/text()')).strip(" ['']")
        return result
    except:
        return ''
 def getStudio(htmlcode): #获取厂商
    html = etree.fromstring(htmlcode,etree.HTMLParser())
    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[3]/a[1]/text()')).strip(" ['']")
    return result
 def getNum(htmlcode):     #获取番号
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[1]/span[2]/text()')).strip(" ['']")
    #print(result)
    return result
 def getRelease(htmlcode2): #
    #a=ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
    html=etree.fromstring(htmlcode2,etree.HTMLParser())
    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[2]/dl/dd[4]/text()')).strip(" ['']")
    return result
 def getCover(htmlcode,number,htmlcode2): #获取厂商 #
    #a = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id=' + str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-") + '&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
    html = etree.fromstring(htmlcode2, etree.HTMLParser())
    result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[1]/div/div[1]/a/img/@src')).strip(" ['']")
    if result == '':
        html = etree.fromstring(htmlcode, etree.HTMLParser())
        result2 = str(html.xpath('//*[@id="slider"]/ul[1]/li[1]/img/@src')).strip(" ['']")
        return 'https://fc2club.com' +  result2
    return 'http:' + result
 def getOutline(htmlcode2):     #获取番号 #
    html = etree.fromstring(htmlcode2, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[1]/div[2]/div[2]/div[1]/div/article/section[4]/p/text()')).strip(" ['']").replace("\\n",'',10000).replace("'",'',10000).replace(', ,','').strip('  ').replace('。,',',')
    return result
 def getTag(htmlcode):     #获取番号
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/div[2]/div/div[1]/h5[4]/a/text()'))
    return result.strip(" ['']").replace("'",'').replace(' ','')
 def getYear(release):
    try:
        result = re.search('\d{4}',release).group()
        return result
    except:
        return ''
 def main(number):
    htmlcode2 = ADC_function.get_html('http://adult.contents.fc2.com/article_search.php?id='+number+'&utm_source=aff_php&utm_medium=source_code&utm_campaign=from_aff_php')
    htmlcode = ADC_function.get_html('https://fc2club.com//html/FC2-' + number + '.html')
    actor = getActor(htmlcode)
    if getActor(htmlcode) == '':
        actor = 'FC2系列'
    dic = {
        'title':    getTitle(htmlcode),
        'studio':   getStudio(htmlcode),
        'year': '',#str(re.search('\d{4}',getRelease(number)).group()),
        'outline':  getOutline(htmlcode2),
        'runtime':  getYear(getRelease(htmlcode)),
        'director': getStudio(htmlcode),
        'actor':    actor,
        'release':  getRelease(number),
        'number':  'FC2-'+number,
        'cover':    getCover(htmlcode,number,htmlcode2),
        'imagecut': 0,
        'tag':      getTag(htmlcode),
        'actor_photo':'',
        'website':  'https://fc2club.com//html/FC2-' + number + '.html',
        'source':'https://fc2club.com//html/FC2-' + number + '.html',
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
    return js
 #print(main('1051725'))
--- a/readme/._readme1.PNG
+++ b/readme/._readme1.PNG
--- a/readme/._readme2.PNG
+++ b/readme/._readme2.PNG
--- a/readme/._readme4.PNG
+++ b/readme/._readme4.PNG
--- a/readms.md's
+++ b/readms.md's
--- a/readme/flow_chart2.png
+++ b/readme/flow_chart2.png
--- a/readme/readme1.PNG
+++ b/readme/readme1.PNG
--- a/readme/readme2.PNG
+++ b/readme/readme2.PNG
--- a/readme/readme3.PNG
+++ b/readme/readme3.PNG
--- a/readme/readme4.PNG
+++ b/readme/readme4.PNG
--- a/readme/single.gif
+++ b/readme/single.gif
--- a/resource/This
+++ b/resource/This
@ -0,0 +1 @@
 1
--- a/resource/flow_chart2.png
+++ b/resource/flow_chart2.png
--- a/resource/readme1.PNG
+++ b/resource/readme1.PNG
--- a/resource/readme2.PNG
+++ b/resource/readme2.PNG
--- a/resource/readme3.PNG
+++ b/resource/readme3.PNG
--- a/resource/readme4.PNG
+++ b/resource/readme4.PNG
--- a/resource/ruquirments.txt
+++ b/resource/ruquirments.txt
@ -0,0 +1 @@
 pipenv install -rlxml bs4 pillow pyquery
--- a/resource/single.gif
+++ b/resource/single.gif
--- a/test.py
+++ b/test.py
@ -0,0 +1,80 @@
 import os
 import re
 from itertools import groupby
 import fuckit as fuckit
 import pandas as pd
 from tenacity import retry, stop_after_delay, wait_fixed
 def go():
    a = [1, 2, 3, 4, 5, 6]
    # [print(x) for x in a]
    # [print(x) for x in a]
    a1 = groupby(a, key=lambda k: (k / 2))
    for i in a1:
        print(i)
    for i in a1:
        print(i)
 class TryDo:
    def __init__(self, func, times=3):
        self.tries = times
        self.func = func
    def __iter__(self):
        self.currentTry = 1
        return self
    def __next__(self):
        if self.currentTry > self.tries:
            raise StopIteration(False)
        else:
            self.currentTry += 1
            self.func()
            raise StopIteration(True)
    # def do(self):
@retry(stop=stop_after_delay(3), wait=wait_fixed(2))
 def stop_after_10_s():
    print("Stopping after 10 seconds")
    raise Exception
 # f = iter( TryDo(do_something, 5))
 # stop_after_10_s()
 def errorfunc():
    raise Exception
 def okfunc():
    print("ok")
 # with fuckit:
 #     errorfunc()
 #     okfunc()
 # re.match()
 r = re.search(r'(?<=999)-?((?P<alpha>([A-Z](?![A-Z])))|(?P<num>\d(?!\d)))', "IPTD-999-B-彼女の姉貴とイケナイ関係-RIO", re.I)
 #
 print(r.groupdict())
 print(r.groupdict()['alpha'])
 print(r.group(2))
 import re
 line = "Cats are smarter than dogs"
 matchObj = re.search(r'(?<=a)(.*) are (.*?) .*', line, re.M | re.I)
 if matchObj:
    print("matchObj.group() : ", matchObj.group())
    print("matchObj.group(1) : ", matchObj.group(1))
    print("matchObj.group(2) : ", matchObj.group(2))
 else:
    print("No match!!")
 # print(r[-1])
 # print(newList)
--- a/update_check.json
+++ b/update_check.json
@ -1,5 +1,5 @@
 {
-	"version": "2.1",
+	"version": "2.8.2",
-	"version_show":"2.1",
+	"version_show":"2.8.2",
 	"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
-}
+}
Author	SHA1	Message	Date
Tan Peng	fc13f88731	优化正则等，修改逻辑，避免被覆盖 to learn goupby learn pandas groupby groupby learn pandas groupby 优化正则提取番号和集数待理解下载图片逻辑还有剪裁+背景图逻辑修改所有config[ 将整理生成nfo的代码可缓存番号信息和缩略图和海报可以识别番号后集数和尾部集数,赞不能分辨-C中文字幕片改正一个错误嵌套字典存储数据整理函数修正匹配时间正则 pipenv 添加依赖修改优先取三位数字的规则：heyzo四位数除外添加了依赖和有番号的优化修改了啥我也记不得了	2022-10-09 20:47:38 +08:00
Yoshiko	32a19bb989	Merge pull request #163 from 68cdrBxM8YdoJ/master Add support jav321	2020-03-21 21:44:40 +08:00
68cdrBxM8YdoJ	9a530f4e46	Add support jav321	2020-03-20 14:04:00 +09:00
Yoshiko	780e47ffba	Merge pull request #158 from oweaF/master Create ruquirments.txt	2020-03-16 15:37:13 +08:00
oweaF	b31f27de97	Create ruquirments.txt	2020-03-13 11:54:50 +08:00
Yoshiko	0f720acd8a	Update 2.8.2	2020-03-13 01:19:07 +08:00
Yoshiko	09cf8206a9	Update README.md	2020-03-13 01:07:01 +08:00
Yoshiko	9120937398	Merge pull request #155 from jnozsc/eol_lf use LF instead of CR+LF for all python files	2020-03-13 00:57:01 +08:00
Yoshiko	7a66695eea	Update 2.8	2020-03-13 00:54:02 +08:00
jnozsc	8d60cdbb30	use LF instead of CR+LF for all python files	2020-03-09 14:50:36 -07:00
Yoshiko	c22863ece4	Update 2.8	2020-03-08 20:38:25 +08:00
Yoshiko	73cdd797c5	Merge pull request #152 from halo9999/fix-matching-number Fix matching chinese or japanese as number	2020-03-08 20:27:45 +08:00
Yoshiko	7eec310929	Merge pull request #151 from jnozsc/version_2.7 bump version to 2.7	2020-03-08 20:27:13 +08:00
Yoshiko	aeebfc753b	Merge pull request #148 from jnozsc/fix_fanza_getCover tweak fanza getCover()	2020-03-08 20:26:45 +08:00
Yoshiko	aafb493a17	Merge pull request #147 from halo9999/bug-fix use failed_output_folder in config instead of hard-coding	2020-03-08 20:24:21 +08:00
halo9999	9d87d9769d	Fix matching chinese or japanese as number	2020-03-07 20:15:30 +09:00
jnozsc	8b36cfb35c	bump version to 2.7	2020-03-05 09:38:02 -08:00
jnozsc	3b85ebfa51	tweak fanza getCover()	2020-03-04 15:25:33 -08:00
halo9999	f415b4664a	use failed_output_folder in config instead of hard-coding	2020-03-05 03:12:40 +09:00
Yoshiko	bedd76bc60	Update 2.7	2020-03-05 01:40:50 +08:00
Yoshiko	88075d7dd8	Merge pull request #134 from vicnoah/master 修复webm文件后缀匹配不上的问题	2020-02-25 21:37:49 +08:00
vicnoah	4b59f94e75	Fix the suffix matching problem of WebM file	2020-02-22 14:58:06 +08:00
Yoshiko	6b4e501180	Update README.md	2020-02-18 22:59:27 +08:00
Yoshiko	6a1af89596	Merge pull request #124 from jnozsc/fix_issue_119 fix #119	2020-02-18 15:11:38 +08:00
Yoshiko	a9fb890639	Merge pull request #130 from jnozsc/refactor-search-logic refactor search logic	2020-02-18 15:11:24 +08:00
Yoshiko	57cdd79003	Merge pull request #129 from jnozsc/fix_javdb fix javdb issue when it returns multiple results	2020-02-18 15:11:17 +08:00
Yoshiko	a989382888	Merge pull request #128 from jnozsc/fix_fc2fans_club add a try catch logic for fc2fans_club.py	2020-02-18 15:11:08 +08:00
Yoshiko	4ca2d957a3	Merge pull request #126 from jnozsc/rewrite_fanza rewrite fanza.py	2020-02-18 15:10:59 +08:00
Yoshiko	706d920d65	Merge pull request #123 from jnozsc/remove_core2 remove core2.py	2020-02-18 15:10:41 +08:00
jnozsc	a4c8bcf2b4	refactor search logic	2020-02-17 22:09:10 -08:00
jnozsc	06de0232a1	typo	2020-02-17 21:51:09 -08:00
jnozsc	8dc9be12cc	fix javdb issue when it returns multiple results	2020-02-17 21:50:34 -08:00
jnozsc	53fe85e607	add a try catch logic for fc2fans_club.py	2020-02-17 21:45:13 -08:00
jnozsc	5f46f3f25d	rewrite fanza.py	2020-02-17 10:47:11 -08:00
jnozsc	229066fa99	fix #119	2020-02-17 09:24:38 -08:00
jnozsc	6b6c884c47	remove core2.py	2020-02-17 09:16:39 -08:00
Yoshiko	690557f878	Update README.md	2020-02-18 00:18:00 +08:00
Yoshiko	7d59456597	2.7 Update (future) 2.7 Update (future)	2020-02-17 16:00:02 +08:00
Yoshiko	fb9c8201f5	2.7 Update (future) 2.7 Update (future)	2020-02-17 15:59:49 +08:00
Yoshiko	d647ddfe07	Merge pull request #122 from jnozsc/fix-fanza-hinban-issue 2.7 Update (future)	2020-02-17 15:59:36 +08:00
jnozsc	dabe1f2da6	add edge case	2020-02-16 15:00:29 -08:00
jnozsc	8dda8da2b3	fix fanza hinban issue	2020-02-16 14:45:40 -08:00
jnozsc	064d8a8349	refactor config proxy	2020-02-16 13:20:10 -08:00
jnozsc	847e79c6a0	typo	2020-02-16 11:17:41 -08:00
jnozsc	b628e11811	fix typo	2020-02-16 11:11:51 -08:00
jnozsc	440577b943	change code block	2020-02-16 11:06:09 -08:00
jnozsc	65741bc5cb	add 2 space	2020-02-16 11:05:20 -08:00
jnozsc	fd0c18a220	fix docs	2020-02-16 11:03:48 -08:00
jnozsc	b5de3942ae	fix more doc	2020-02-16 10:56:23 -08:00
jnozsc	d6d6fc5a95	fix more typo	2020-02-16 10:48:25 -08:00
jnozsc	8436a3871c	fix more doc	2020-02-16 10:45:46 -08:00
jnozsc	202673dd32	fix more links	2020-02-16 10:38:19 -08:00
jnozsc	5fdda13320	fix markdown format and content	2020-02-16 10:33:57 -08:00
Yoshiko	e0d2058fa0	Update 2.6	2020-02-14 18:46:11 +08:00
Yoshiko	dd3d394d58	Merge pull request #103 from vicnoah/master add WebM support	2020-02-12 17:19:15 +08:00
Yoshiko	6e5831d7d6	Update README.md	2020-02-09 17:48:34 +08:00
Yoshiko	ea54a149a8	Update README.md	2020-02-07 19:50:49 +08:00
Yoshiko	9e7c798cd1	Update README.md	2020-02-07 19:50:01 +08:00
Yoshiko	b4e2530b6f	Update README.md	2020-02-07 17:53:52 +08:00
Yoshiko	6b5af440f1	Update README.md	2020-02-07 17:53:22 +08:00
Yoshiko	c362e7a4d7	Update README.md	2020-02-07 17:53:02 +08:00
vicnoah	068dc86480	WebM support	2020-02-05 21:26:15 +08:00
Yoshiko	87db5b5426	Update 2.5	2020-02-04 01:10:10 +08:00
Yoshiko	9b6cd74caa	Update 2.5	2020-02-04 01:09:12 +08:00
Yoshiko	6cbfd2ab0e	Update 2.5	2020-02-04 01:04:03 +08:00
Yoshiko	a46391c6b2	Update 2.5	2020-02-04 01:02:51 +08:00
Yoshiko	2c2867e3c6	Update README.md	2020-02-01 15:59:23 +08:00
Yoshiko	9102c28247	Update update_check.json	2020-02-01 03:32:37 +08:00
Yoshiko	05d1ac50c1	Update 2.4	2020-01-31 18:01:01 +08:00
Yoshiko	7d5da45567	Update 2.4	2020-01-31 17:47:40 +08:00
Yoshiko	03a4669e48	Merge pull request #85 from moyy996/master Update 2.4 ,By moyy996	2020-01-29 18:44:48 +08:00
mo_yy	123b2f4cfc	2.3-修复out of range 的bug	2020-01-29 14:29:13 +08:00
mo_yy	cab36be6a2	2.3-添加排除目录	2020-01-29 14:28:30 +08:00
mo_yy	4660b1cdf2	2.3-改所有全局变量为传参	2020-01-29 14:27:37 +08:00
mo_yy	8190da3d3e	排除指定目录	2020-01-29 13:24:45 +08:00
mo_yy	5105051f53	排除指定目录	2020-01-29 13:21:42 +08:00
Yoshiko	de1c9231ad	Update 2.3	2020-01-27 12:58:57 +08:00
Yoshiko	fe72afc1cf	Update 2.3	2020-01-27 01:28:59 +08:00
Yoshiko	70f1b16b3c	Update 2.2	2020-01-22 02:19:40 +08:00
Yoshiko	0b5d68732f	Merge pull request #77 from moyy996/master Update 2.2	2020-01-21 14:20:13 +08:00
Yoshiko	4131b00965	Update 2.1	2020-01-19 17:59:52 +08:00
Yoshiko	6f6ed31b31	Update 2.1	2020-01-19 17:30:35 +08:00
Yoshiko	09006865b9	Update README.md	2020-01-19 02:24:51 +08:00
Yoshiko	c3519a2db2	Update README.md	2020-01-19 02:21:02 +08:00
Yoshiko	b21d47c55c	Update 2.0	2020-01-19 02:16:14 +08:00
		`@ -0,0 +1,3 @@`
							`from addict import Dict`

							`# class Metadata:`