Beta 11.4 Update

2019-07-04 02:25:40 +08:00 · 2019-07-04 02:25:40 +08:00 · 4b35113932
commit 4b35113932
parent d672d4d0d7
8 changed files with 115 additions and 55 deletions
--- a/ADC_function.py
+++ b/ADC_function.py
@ -5,17 +5,17 @@ import re
 from retrying import retry
 import sys

-# content = open('proxy.ini').read()
+# content = open('config.ini').read()
 # content = re.sub(r"\xfe\xff","", content)
 # content = re.sub(r"\xff\xfe","", content)
 # content = re.sub(r"\xef\xbb\xbf","", content)
 # open('BaseConfig.cfg', 'w').write(content)

 config = RawConfigParser()
-if os.path.exists('proxy.ini'):
-    config.read('proxy.ini', encoding='UTF-8')
+if os.path.exists('config.ini'):
+    config.read('config.ini', encoding='UTF-8')
 else:
-    with open("proxy.ini", "wt", encoding='UTF-8') as code:
+    with open("config.ini", "wt", encoding='UTF-8') as code:
        print("[proxy]",file=code)
        print("proxy=127.0.0.1:1080",file=code)
        print("timeout=10", file=code)
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@ -7,7 +7,7 @@ import ADC_function
 import json
 import shutil

-version='0.11.2'
+version='0.11.4'
 os.chdir(os.getcwd())

 def UpdateCheck():
@ -102,12 +102,20 @@ def getNumber(filepath):
        print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
        print('[-]' + str(os.path.basename(filepath)) + ' :', e)
        print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
+
        shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
    except IOError as e2:
        print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
        print('[-]' + str(os.path.basename(filepath)) + ' :', e2)
        print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
        shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
+def RunCore():
+    if os.path.exists('core.py'):
+        os.system('python core.py' + '   "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动  （用于源码py）
+    elif os.path.exists('core.exe'):
+        os.system('core.exe' + '   "' + i + '" --number "'+getNumber(i)+'"')      #选择从exe文件启动（用于EXE版程序:
+    elif os.path.exists('core.py') and os.path.exists('core.exe'):
+        os.system('python core.py' + '   "' + i + '" --number "' + getNumber(i) + '"')  # 选择从py文件启动  （用于源码py）

 if __name__ =='__main__':
    print('[*]===========AV Data Capture===========')
@ -124,9 +132,7 @@ if __name__ =='__main__':
        percentage = str(count/int(count_all)*100)[:4]+'%'
        print('[!] - '+percentage+' ['+str(count)+'/'+count_all+'] -')
        print("[!]Making Data for   [" + i + "],the number is [" + getNumber(i) + "]")
-        os.system('python core.py' + '   "' + i + '" --number "'+getNumber(i)+'"') #选择从py文件启动  （用于源码py）
-        #os.system('core.exe' + '   "' + i + '" --number "'+getNumber(i)+'"')      #选择从exe文件启动（用于EXE版程序
-        #print()
+        RunCore()
        print("[*]=====================================")

    CEF('JAV_output')
--- a/config.ini
+++ b/config.ini
@ -0,0 +1,12 @@
+[proxy]
+proxy=127.0.0.1:1080
+timeout=10
+retry=3
+
+[Name_Rule]
+location_rule='JAV_output/'+actor+'/'+number
+naming_rule=number+'-'+title
+
+[update]
+update_check=0
+#on=1,off=0
--- a/core.py
+++ b/core.py
@ -30,6 +30,9 @@ tag=[]
 cn_sub=''
 path=''
 houzhui=''
+website=''
+json_data={}
+actor_photo={}
 naming_rule  =''#eval(config['Name_Rule']['naming_rule'])
 location_rule=''#eval(config['Name_Rule']['location_rule'])

@ -63,6 +66,8 @@ def getDataFromJSON(file_number): #从JSON返回元数据
    global tag
    global image_main
    global cn_sub
+    global website
+    global actor_photo

    global naming_rule
    global location_rule
@ -70,7 +75,7 @@ def getDataFromJSON(file_number): #从JSON返回元数据
    try:    # 添加 需要 正则表达式的规则
        # =======================javdb.py=======================
        if re.search('^\d{5,}', file_number).group() in file_number:
-            json_data = json.loads(javdb.main(file_number))
+            json_data = json.loads(javbus.main_uncensored(file_number))
    except:  # 添加 无需 正则表达式的规则
        # ====================fc2fans_club.py====================
        if 'fc2' in file_number:
@ -99,6 +104,8 @@ def getDataFromJSON(file_number): #从JSON返回元数据
    imagecut =       json_data['imagecut']
    tag =        str(json_data['tag']).strip("[ ]").replace("'", '').replace(" ", '').split(',')  # 字符串转列表
    actor =      str(actor_list).strip("[ ]").replace("'", '').replace(" ", '')
+    actor_photo =    json_data['actor_photo']
+    website =        json_data['website']

    # ====================处理异常字符====================== #\/:*?"<>|
    if '\\' in title:
@ -142,7 +149,7 @@ def creatFolder(): #创建文件夹
 #=====================资源下载部分===========================
 def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder!
    config = ConfigParser()
-    config.read('proxy.ini', encoding='UTF-8')
+    config.read('config.ini', encoding='UTF-8')
    proxy       = str(config['proxy']['proxy'])
    timeout     = int(config['proxy']['timeout'])
    retry_count = int(config['proxy']['retry'])
@ -184,19 +191,20 @@ def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in
            print('[-]Image Download :  Connect retry '+str(i)+'/'+str(retry_count))
 def imageDownload(filepath): #封面是否下载成功，否则移动到failed
    global path
-    if DownloadFileWithFilename(cover,naming_rule+'.jpg', path) == 'failed':
+    if DownloadFileWithFilename(cover,'fanart.jpg', path) == 'failed':
        shutil.move(filepath, 'failed/')
        os._exit(0)
-    DownloadFileWithFilename(cover, naming_rule+'.jpg', path)
-    print('[+]Image Downloaded!', path +'/'+naming_rule+'.jpg')
+    DownloadFileWithFilename(cover, 'fanart.jpg', path)
+    print('[+]Image Downloaded!', path +'/fanart.jpg')
 def PrintFiles(filepath):
    #global path
    global title
    global cn_sub
+    global actor_photo
    try:
        if not os.path.exists(path):
            os.makedirs(path)
-        with open(path + "/" + naming_rule + ".nfo", "wt", encoding='UTF-8') as code:
+        with open(path + "/" + number + ".nfo", "wt", encoding='UTF-8') as code:
            print("<movie>", file=code)
            print(" <title>" + naming_rule + "</title>", file=code)
            print("  <set>", file=code)
@ -207,13 +215,15 @@ def PrintFiles(filepath):
            print("  <plot>"+outline+"</plot>", file=code)
            print("  <runtime>"+str(runtime).replace(" ","")+"</runtime>", file=code)
            print("  <director>" + director + "</director>", file=code)
-            print("  <poster>" + naming_rule + ".png</poster>", file=code)
-            print("  <thumb>" + naming_rule + ".png</thumb>", file=code)
-            print("  <fanart>"+naming_rule + '.jpg'+"</fanart>", file=code)
+            print("  <poster>poster.png</poster>", file=code)
+            print("  <thumb>thumb.png</thumb>", file=code)
+            print("  <fanart>fanart.jpg</fanart>", file=code)
            try:
-                for u in actor_list:
+                for key, value in actor_photo.items():
                    print("  <actor>", file=code)
-                    print("   <name>" + u + "</name>", file=code)
+                    print("   <name>" + key + "</name>", file=code)
+                    if not actor_photo == '':  # or actor_photo == []:
+                        print("   <thumb>" + value + "</thumb>", file=code)
                    print("  </actor>", file=code)
            except:
                aaaa=''
@ -237,9 +247,9 @@ def PrintFiles(filepath):
            print("  <num>" + number + "</num>", file=code)
            print("  <release>" + release + "</release>", file=code)
            print("  <cover>"+cover+"</cover>", file=code)
-            print("  <website>" + "https://www.javbus.com/"+number + "</website>", file=code)
+            print("  <website>" + website + "</website>", file=code)
            print("</movie>", file=code)
-            print("[+]Writeed!          "+path + "/" + naming_rule + ".nfo")
+            print("[+]Writeed!          "+path + "/" + number + ".nfo")
    except IOError as e:
        print("[-]Write Failed!")
        print(e)
@ -253,31 +263,32 @@ def PrintFiles(filepath):
 def cutImage():
    if imagecut == 1:
        try:
-            img = Image.open(path + '/' + naming_rule + '.jpg')
+            img = Image.open(path + '/fanart.jpg')
            imgSize = img.size
            w = img.width
            h = img.height
            img2 = img.crop((w / 1.9, 0, w, h))
-            img2.save(path + '/' + naming_rule + '.png')
+            img2.save(path + '/poster.png')
        except:
            print('[-]Cover cut failed!')
    else:
-        img = Image.open(path + '/' + naming_rule + '.jpg')
+        img = Image.open(path + '/fanart.jpg')
        w = img.width
        h = img.height
-        img.save(path + '/' + naming_rule + '.png')
+        img.save(path + '/poster.png')
 def pasteFileToFolder(filepath, path): #文件路径，番号，后缀，要移动至的位置
    global houzhui
    houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group())
-    os.rename(filepath, naming_rule + houzhui)
-    shutil.move(naming_rule + houzhui, path)
+    os.rename(filepath, number + houzhui)
+    shutil.move(number + houzhui, path)
 def renameJpgToBackdrop_copy():
-    shutil.copy(path+'/'+naming_rule + '.jpg', path+'/Backdrop.jpg')
+    shutil.copy(path+'/fanart.jpg', path+'/Backdrop.jpg')
+    shutil.copy(path + '/poster.png', path + '/thumb.png')

 if __name__ == '__main__':
    filepath=argparse_get_file()[0] #影片的路径

-    if '-c.' in filepath or '-C.' in filepath:
+    if '-c.' in filepath or '-C.' in filepath or '中文' in filepath or '字幕' in filepath:
        cn_sub='1'

    if argparse_get_file()[1] == '':    #获取手动拉去影片获取的番号
--- a/fc2fans_club.py
+++ b/fc2fans_club.py
@ -73,6 +73,8 @@ def main(number2):
        'cover':    getCover(htmlcode,number,htmlcode2),
        'imagecut': 0,
        'tag':      getTag(htmlcode),
+        'actor_photo':'',
+        'website':  'http://fc2fans.club/html/FC2-' + number + '.html',
    }
    #print(getTitle(htmlcode))
    #print(getNum(htmlcode))
--- a/javbus.py
+++ b/javbus.py
@ -13,6 +13,18 @@ from ADC_function import *
 import javdb
 import siro

+def getActorPhoto(htmlcode): #//*[@id="star_qdt"]/li/a/img
+    soup = BeautifulSoup(htmlcode, 'lxml')
+    a = soup.find_all(attrs={'class': 'star-name'})
+    d={}
+    for i in a:
+        l=i.a['href']
+        t=i.get_text()
+        html = etree.fromstring(get_html(l), etree.HTMLParser())
+        p=str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+        p2={t:p}
+        d.update(p2)
+    return d
 def getTitle(htmlcode):  #获取标题
    doc = pq(htmlcode)
    title=str(doc('div.container h3').text()).replace(' ','-')
@ -100,17 +112,18 @@ def main(number):
            'imagecut': 1,
            'tag': getTag(htmlcode),
            'label': getSerise(htmlcode),
+            'actor_photo': getActorPhoto(htmlcode),
+            'website': 'https://www.javbus.com/' + number,
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
-
        if 'HEYZO' in number or 'heyzo' in number or 'Heyzo' in number:
            htmlcode = get_html('https://www.javbus.com/' + number)
-            dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+            #dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
            dic = {
                'title': str(re.sub('\w+-\d+-', '', getTitle(htmlcode))),
                'studio': getStudio(htmlcode),
                'year': getYear(htmlcode),
-                'outline': getOutline(dww_htmlcode),
+                'outline': '',
                'runtime': getRuntime(htmlcode),
                'director': getDirector(htmlcode),
                'actor': getActor(htmlcode),
@ -120,6 +133,8 @@ def main(number):
                'imagecut': 1,
                'tag': getTag(htmlcode),
                'label': getSerise(htmlcode),
+                'actor_photo': getActorPhoto(htmlcode),
+                'website': 'https://www.javbus.com/' + number,
            }
            js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4,
                             separators=(',', ':'), )  # .encode('UTF-8')
@ -132,6 +147,9 @@ def main(number):
 def main_uncensored(number):
    htmlcode = get_html('https://www.javbus.com/' + number)
    dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
+    if getTitle(htmlcode) == '':
+        htmlcode = get_html('https://www.javbus.com/' + number.replace('-','_'))
+        dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
    dic = {
        'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))).replace(getNum(htmlcode)+'-',''),
        'studio': getStudio(htmlcode),
@ -146,6 +164,8 @@ def main_uncensored(number):
        'tag': getTag(htmlcode),
        'label': getSerise(htmlcode),
        'imagecut': 0,
+        'actor_photo': '',
+        'website': 'https://www.javbus.com/' + number,
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')

--- a/javdb.py
+++ b/javdb.py
@ -56,6 +56,8 @@ def getTag(a):
 def getCover(htmlcode):
    html = etree.fromstring(htmlcode, etree.HTMLParser())
    result = str(html.xpath('/html/body/section/div/div[2]/div[1]/a/img/@src')).strip(" ['']")
+    if result == '':
+        result = str(html.xpath('/html/body/section/div/div[3]/div[1]/a/img/@src')).strip(" ['']")
    return result
 def getDirector(a):
    html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
@ -68,11 +70,11 @@ def getOutline(htmlcode):
    return result
 def main(number):
    try:
-        try:
-            a = get_html('https://javdb1.com/search?q=' + number + '&f=all')
+        a = get_html('https://javdb.com/search?q=' + number + '&f=all')
        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        except:
-            a = get_html('https://javdb1.com/search?q=' + number.replace('-', '_') + '&f=all')
+        result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
+        if result1 == '':
+            a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all')
            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
            result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
        b = get_html('https://javdb1.com' + result1)
@ -95,17 +97,20 @@ def main(number):
            'tag': getTag(a),
            'label': getLabel(a),
            'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'actor_photo': '',
+            'website': 'https://javdb1.com' + result1,
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
        return js
    except:
-        try:
        a = get_html('https://javdb.com/search?q=' + number + '&f=all')
        html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
-        except:
+        result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
+        if result1 == '':
            a = get_html('https://javdb.com/search?q=' + number.replace('-', '_') + '&f=all')
            html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
            result1 = str(html.xpath('//*[@id="videos"]/div/div/a/@href')).strip(" ['']")
+
        b = get_html('https://javdb.com' + result1)
        soup = BeautifulSoup(b, 'lxml')

@ -126,6 +131,8 @@ def main(number):
            'tag': getTag(a),
            'label': getLabel(a),
            'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
+            'actor_photo': '',
+            'website':'https://javdb.com' + result1,
        }
        js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), )  # .encode('UTF-8')
        return js
--- a/siro.py
+++ b/siro.py
@ -95,6 +95,8 @@ def main(number2):
        'tag': getTag(a),
        'label':getLabel(a),
        'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()),
+        'actor_photo': '',
+        'website':'https://www.mgstage.com/product/product_detail/'+str(number)+'/',
    }
    js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
    return js