Update 2.3

2020-01-27 01:28:59 +08:00 · 2020-01-27 01:28:59 +08:00 · fe72afc1cf
commit fe72afc1cf
parent 70f1b16b3c
5 changed files with 190 additions and 42 deletions
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@ -14,7 +14,7 @@ os.chdir(os.getcwd())

 # ============global var===========

-version='2.2'
+version='2.3'

 config = ConfigParser()
 config.read(config_file, encoding='UTF-8')
@ -23,20 +23,6 @@ Platform = sys.platform

 # ==========global var end=========

-def moveMovies():
-    movieFiles = []
-    fromPath = config['movie_location']['path']
-    if Platform == 'win32':
-        movieFormat = ["avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"]
-    else:
-        movieFormat = ["AVI", "RMVB", "WMV", "MOV", "MP4", "MKV", "FLV", "TS","avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"]
-    for fm in movieFormat:
-        movieFiles = movieFiles + [os.path.join(dirpath, f)
-            for dirpath, dirnames, files in os.walk(fromPath)
-            for f in fnmatch.filter(files, '*.' + fm)]
-    for movie in movieFiles:
-        print("Move file " + movie)
-        shutil.move(movie, os.path.curdir)
 def UpdateCheck():
    if UpdateCheckSwitch() == '1':
        html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
@ -53,11 +39,17 @@ def movie_lists():
    global exclude_directory_1
    global exclude_directory_2
    total=[]
-    file_type = ['mp4','avi','rmvb','wmv','mov','mkv','flv','ts']
+    file_type = ['.mp4','.avi','.rmvb','.wmv','.mov','.mkv','.flv','.ts','.MP4', '.AVI', '.RMVB', '.WMV', '.MOV', '.MKV', '.FLV', '.TS',]
    exclude_directory_1 = config['common']['failed_output_folder']
    exclude_directory_2 = config['common']['success_output_folder']
-    for a in file_type:
-        total += glob.glob(r"./*." + a)
+    file_root=os.getcwd()
+    for root,dirs,files in os.walk(file_root):
+        if exclude_directory_1 not in root and exclude_directory_2 not in root:
+            for f in files:
+                if os.path.splitext(f)[1] in file_type:
+                    path = os.path.join(root,f)
+                    path = path.replace(file_root,'.')
+                    total.append(path)
    return total
 def CreatFailedFolder():
    if not os.path.exists('failed/'):  # 新建failed文件夹
@ -86,7 +78,7 @@ def rreplace(self, old, new, *max):
    return new.join(self.rsplit(old, count))
 def getNumber(filepath):
    filepath = filepath.replace('.\\','')
-    try:  # 普通提取番号 主要处理包含减号-的番号
+    if '-' in filepath or '_' in filepath:  # 普通提取番号 主要处理包含减号-和_的番号
        filepath = filepath.replace("_", "-")
        filepath.strip('22-sht.me').strip('-HD').strip('-hd')
        filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath))  # 去除文件名中时间
@ -97,18 +89,11 @@ def getNumber(filepath):
        except:  # 提取类似mkbd-s120番号
            file_number = re.search('\w+-\w+\d+', filename).group()
        return file_number
-    except:  # 提取不含减号-的番号
+    else:  # 提取不含减号-的番号，FANZA CID
        try:
-            filename = str(re.sub("ts6\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
-            filename = str(re.sub(".*?\.com-\d+", "", filename)).replace('_', '')
-            file_number = str(re.search('\w+\d{4}', filename).group(0))
-            return file_number
-        except:  # 提取无减号番号
-            filename = str(re.sub("ts6\d", "", filepath))  # 去除ts64/265
-            filename = str(re.sub(".*?\.com-\d+", "", filename))
-            file_number = str(re.match('\w+', filename).group())
-            file_number = str(file_number.replace(re.match("^[A-Za-z]+", file_number).group(),re.match("^[A-Za-z]+", file_number).group() + '-'))
-            return file_number
+            return str(re.findall(r'(.+?)\.', str(re.search('([^<>/\\\\|:""\\*\\?]+)\\.\\w+$', filepath).group()))).strip("['']").replace('_', '-')
+        except:
+            return re.search(r'(.+?)\.',filepath)[0]

 def RunCore():
    if Platform == 'win32':
@ -120,7 +105,10 @@ def RunCore():
            os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
    else:
        if os.path.exists('core.py'):
-            os.system('python3 core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
+            try:
+                os.system('python3 core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
+            except:
+                os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从py文件启动（用于源码py）
        elif os.path.exists('core.exe'):
            os.system('core.exe' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 从exe启动（用于EXE版程序）
        elif os.path.exists('core.py') and os.path.exists('core.exe'):
@ -130,17 +118,18 @@ if __name__ =='__main__':
    print('[*]================== AV Data Capture ===================')
    print('[*]                     Version '+version)
    print('[*]======================================================')
+
    CreatFailedFolder()
    UpdateCheck()
-    moveMovies()
    os.chdir(os.getcwd())
+    movie_list=movie_lists()

    count = 0
-    count_all = str(len(movie_lists()))
-    print('[+]Find',str(len(movie_lists())),'movies')
+    count_all = str(len(movie_list))
+    print('[+]Find',count_all,'movies')
    if config['common']['soft_link'] == '1':
        print('[!] --- Soft link mode is ENABLE! ----')
-    for i in movie_lists(): #遍历电影列表 交给core处理
+    for i in movie_list: #遍历电影列表 交给core处理
        count = count + 1
        percentage = str(count/int(count_all)*100)[:4]+'%'
        print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -')
--- a/config.ini
+++ b/config.ini
@ -23,5 +23,5 @@ media_warehouse=emby
 [escape]
 literals=\()

-[movie_location]
-path=
+[debug_mode]
+switch=0
--- a/core.py
+++ b/core.py
@ -16,6 +16,7 @@ import siro
 import avsox
 import javbus
 import javdb
+import fanza
 # =========website========


@ -135,6 +136,14 @@ def getDataFromJSON(file_number):  # 从JSON返回元数据
    # ==
    elif 'siro' in file_number or 'SIRO' in file_number or 'Siro' in file_number:
        json_data = json.loads(siro.main(file_number))
+    elif not '-' in file_number or '_' in file_number:
+        json_data = json.loads(fanza.main(file_number))
+        if getDataState(json_data) == 0:  # 如果元数据获取失败，请求番号至其他网站抓取
+            json_data = json.loads(javbus.main(file_number))
+        if getDataState(json_data) == 0:  # 如果元数据获取失败，请求番号至其他网站抓取
+            json_data = json.loads(avsox.main(file_number))
+        if getDataState(json_data) == 0:  # 如果元数据获取失败，请求番号至其他网站抓取
+            json_data = json.loads(javdb.main(file_number))
    # ==
    else:
        json_data = json.loads(javbus.main(file_number))
@ -145,7 +154,7 @@ def getDataFromJSON(file_number):  # 从JSON返回元数据

    # ================================================网站规则添加结束================================================

-    title = str(json_data['title']).replace(' ', '')
+    title = json_data['title']
    studio = json_data['studio']
    year = json_data['year']
    outline = json_data['outline']
@ -305,6 +314,18 @@ def imageDownload():  # 封面是否下载成功，否则移动到failed
        if DownloadFileWithFilename(cover, number + c_word + '.jpg', path) == 'failed':
            moveFailedFolder()
        DownloadFileWithFilename(cover, number + c_word + '.jpg', path)
+        if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+            print('[+]Image Downloaded!', path + '/' + number + c_word + '.jpg')
+            return
+        i = 1
+        while i <= int(config['proxy']['retry']):
+            if os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+                print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
+                DownloadFileWithFilename(cover, number + c_word + '.jpg', path)
+                i = i + 1
+                continue
+            else:
+                break
        if multi_part == 1:
            old_name = os.path.join(path, number + c_word + '.jpg')
            new_name = os.path.join(path, number + c_word + '.jpg')
@ -316,11 +337,38 @@ def imageDownload():  # 封面是否下载成功，否则移动到failed
        if DownloadFileWithFilename(cover, 'fanart.jpg', path) == 'failed':
            moveFailedFolder()
        DownloadFileWithFilename(cover, 'fanart.jpg', path)
+        if not os.path.getsize(path + '/fanart.jpg') == 0:
+            print('[+]Image Downloaded!', path + '/fanart.jpg')
+            return
+        i = 1
+        while i <= int(config['proxy']['retry']):
+            if os.path.getsize(path + '/fanart.jpg') == 0:
+                print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
+                DownloadFileWithFilename(cover, 'fanart.jpg', path)
+                i = i + 1
+                continue
+            else:
+                break
+        if not os.path.getsize(path + '/' + number + c_word + '.jpg') == 0:
+            print('[!]Image Download Failed! Trying again.')
+            DownloadFileWithFilename(cover, number + c_word + '.jpg', path)
        print('[+]Image Downloaded!', path + '/fanart.jpg')
    elif option == 'kodi':
        if DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path) == 'failed':
            moveFailedFolder()
        DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path)
+        if not os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
+            print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')
+            return
+        i = 1
+        while i <= int(config['proxy']['retry']):
+            if os.path.getsize(path + '/' + number + c_word + '-fanart.jpg') == 0:
+                print('[!]Image Download Failed! Trying again. [' + config['proxy']['retry'] + '/3]')
+                DownloadFileWithFilename(cover, number + c_word + '-fanart.jpg', path)
+                i = i + 1
+                continue
+            else:
+                break
        print('[+]Image Downloaded!', path + '/' + number + c_word + '-fanart.jpg')


@ -330,6 +378,7 @@ def PrintFiles():
            os.makedirs(path)
        if option == 'plex':
            with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+                print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
                print("<movie>", file=code)
                print(" <title>" + naming_rule + part + "</title>", file=code)
                print("  <set>", file=code)
@ -377,6 +426,7 @@ def PrintFiles():
                print("[+]Writeed!          " + path + "/" + number + ".nfo")
        elif option == 'emby':
            with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+                print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
                print("<movie>", file=code)
                print(" <title>" + naming_rule + part + "</title>", file=code)
                print("  <set>", file=code)
@ -424,6 +474,7 @@ def PrintFiles():
                print("[+]Writeed!          " + path + "/" + number + c_word + ".nfo")
        elif option == 'kodi':
            with open(path + "/" + number + c_word + ".nfo", "wt", encoding='UTF-8') as code:
+                print('<?xml version="1.0" encoding="UTF-8" ?>', file=code)
                print("<movie>", file=code)
                print(" <title>" + naming_rule + part + "</title>", file=code)
                print("  <set>", file=code)
@ -618,11 +669,11 @@ def debug_mode():
            print('[+] ---Debug info---')
            for i, v in json_data.items():
                if i == 'outline':
-                    print('[+] -', i, ':', len(v), 'characters')
+                    print('[+]  -', i, '    :', len(v), 'characters')
                    continue
                if i == 'actor_photo' or i == 'year':
                    continue
-                print('[+] -', i+str(9-len(i)*'-'), ':', v)
+                print('[+]  -',"%-11s" % i, ':', v)
            print('[+] ---Debug info---')
    except:
        aaa = ''
--- a/fanza.py
+++ b/fanza.py
@ -0,0 +1,108 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+import re
+from lxml import etree
+import json
+from ADC_function import *
+# import sys
+# import io
+# sys.stdout = io.TextIOWrapper(sys.stdout.buffer, errors = 'replace', line_buffering = True)
+
+def getTitle(a):
+    html = etree.fromstring(a, etree.HTMLParser())
+    result = html.xpath('//*[@id="title"]/text()')[0]
+    return result
+def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
+    html = etree.fromstring(a, etree.HTMLParser())
+    result = str(html.xpath("//td[contains(text(),'出演者')]/following-sibling::td/span/a/text()")).strip(" ['']").replace("', '",',')
+    return result
+def getStudio(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    try:
+        result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/a/text()")[0]
+    except:
+        result1 = html.xpath("//td[contains(text(),'メーカー')]/following-sibling::td/text()")[0]
+    return result1
+def getRuntime(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = html.xpath("//td[contains(text(),'収録時間')]/following-sibling::td/text()")[0]
+    return re.search('\d+', str(result1)).group()
+def getLabel(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    try:
+        result1 = html.xpath("//td[contains(text(),'シリーズ：')]/following-sibling::td/a/text()")[0]
+    except:
+        result1 = html.xpath("//td[contains(text(),'シリーズ：')]/following-sibling::td/text()")[0]
+    return result1
+def getNum(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    try:
+        result1 = html.xpath("//td[contains(text(),'品番：')]/following-sibling::td/a/text()")[0]
+    except:
+        result1 = html.xpath("//td[contains(text(),'品番：')]/following-sibling::td/text()")[0]
+    return result1
+def getYear(getRelease):
+    try:
+        result = str(re.search('\d{4}',getRelease).group())
+        return result
+    except:
+        return getRelease
+def getRelease(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    try:
+        result1 = html.xpath("//td[contains(text(),'商品発売日：')]/following-sibling::td/a/text()")[0].lstrip('\n')
+    except:
+        result1 = html.xpath("//td[contains(text(),'商品発売日：')]/following-sibling::td/text()")[0].lstrip('\n')
+    return result1
+def getTag(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    try:
+        result1 = str(html.xpath("//td[contains(text(),'ジャンル：')]/following-sibling::td/a/text()")).strip(" ['']")
+    except:
+        result1 = str(html.xpath("//td[contains(text(),'ジャンル：')]/following-sibling::td/text()")).strip(" ['']")
+    return result1.replace("', '",",")
+def getCover(htmlcode,number):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = html.xpath('//*[@id="'+number+'"]/@href')[0]
+    return result
+def getDirector(a):
+    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    try:
+        result1 = html.xpath("//td[contains(text(),'監督：')]/following-sibling::td/a/text()")[0]
+    except:
+        result1 = html.xpath("//td[contains(text(),'監督：')]/following-sibling::td/text()")[0]
+    return result1
+def getOutline(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result = str(html.xpath("//div[@class='mg-b20 lh4']/text()")[0]).replace('\n','')
+    return result
+def main(number):
+    htmlcode=get_html('https://www.dmm.co.jp/digital/videoa/-/detail/=/cid='+number)
+    url = 'https://www.dmm.co.jp/digital/videoa/-/detail/=/cid='+number
+    if '404 Not Found' in htmlcode:
+        htmlcode=get_html('https://www.dmm.co.jp/mono/dvd/-/detail/=/cid='+number)
+        url = 'https://www.dmm.co.jp/mono/dvd/-/detail/=/cid='+number
+    dic = {
+        'title': getTitle(htmlcode).strip(getActor(htmlcode)),
+        'studio': getStudio(htmlcode),
+        'outline': getOutline(htmlcode),
+        'runtime': getRuntime(htmlcode),
+        'director': getDirector(htmlcode),
+        'actor': getActor(htmlcode),
+        'release': getRelease(htmlcode),
+        'number': getNum(htmlcode),
+        'cover': getCover(htmlcode,number),
+        'imagecut': 1,
+        'tag': getTag(htmlcode),
+        'label':getLabel(htmlcode),
+        'year': getYear(getRelease(htmlcode)),  # str(re.search('\d{4}',getRelease(a)).group()),
+        'actor_photo': '',
+        'website': url,
+        'source': 'siro.py',
+    }
+    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))  # .encode('UTF-8')
+    return js
+
+# main('DV-1562')
+# input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束，你可以在结束之前查看和错误信息。")
+#print(main('ssni00384'))
--- a/update_check.json
+++ b/update_check.json
@ -1,5 +1,5 @@
 {
-	"version": "2.2",
-	"version_show":"2.2",
+	"version": "2.3",
+	"version_show":"2.3",
 	"download": "https://github.com/yoshiko2/AV_Data_Capture/releases"
 }