Update 1.4

2019-11-04 13:34:07 +08:00 · 2019-11-04 13:34:07 +08:00 · 71d44bf90c
commit 71d44bf90c
parent 7beeee29e4
4 changed files with 766 additions and 631 deletions
--- a/ADC_function.py
+++ b/ADC_function.py
@ -17,10 +17,15 @@ if os.path.exists(config_file):
    except:
        print('[-]Config.ini read failed! Please use the offical file!')
 else:
-    print('[+]config.ini: not found, creating...')
+    print('[+]config.ini: not found, creating...',end='')
    with open("config.ini", "wt", encoding='UTF-8') as code:
+        print("[common]", file=code)
+        print("main_mode = 1", file=code)
+        print("failed_output_folder = failed", file=code)
+        print("success_output_folder = JAV_output", file=code)
+        print("", file=code)
        print("[proxy]",file=code)
-        print("proxy=127.0.0.1:1080",file=code)
+        print("proxy=127.0.0.1:1081",file=code)
        print("timeout=10", file=code)
        print("retry=3", file=code)
        print("", file=code)
@ -33,16 +38,21 @@ else:
        print("", file=code)
        print("[media]", file=code)
        print("media_warehouse=emby", file=code)
-        print("#emby or plex", file=code)
-        print("#plex only test!", file=code)
+        print("#emby plex kodi", file=code)
        print("", file=code)
-        print("[directory_capture]", file=code)
-        print("switch=0", file=code)
-        print("directory=", file=code)
+        print("[escape]", file=code)
+        print("literals=\\", file=code)
        print("", file=code)
-        print("everyone switch:1=on, 0=off", file=code)
+        print("[movie_location]", file=code)
+        print("path=", file=code)
+        print("", file=code)
+        print('.',end='')
    time.sleep(2)
+    print('.')
    print('[+]config.ini: created!')
+    print('[+]Please restart the program!')
+    time.sleep(4)
+    os._exit(0)
    try:
        config.read(config_file, encoding='UTF-8')
    except:
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python3
+#!/usr/bin/env python3
 # -*- coding: utf-8 -*-

 import glob
@ -9,23 +9,24 @@ import sys
 from ADC_function import *
 import json
 import shutil
-from configparser import ConfigParser
 import fnmatch
+from configparser import ConfigParser
 os.chdir(os.getcwd())

 # ============global var===========

-version='1.3'
+version='1.4'

 config = ConfigParser()
 config.read(config_file, encoding='UTF-8')
-fromPath=config['movie']['path']
+
 Platform = sys.platform

 # ==========global var end=========

-def moveMovies(fromPath):
+def moveMovies():
    movieFiles = []
+    fromPath = config['movie_location']['path']
    if Platform == 'win32':
        movieFormat = ["avi", "rmvb", "wmv", "mov", "mp4", "mkv", "flv", "ts"]
    else:
@ -34,13 +35,10 @@ def moveMovies(fromPath):
        movieFiles = movieFiles + [os.path.join(dirpath, f)
            for dirpath, dirnames, files in os.walk(fromPath)
            for f in fnmatch.filter(files, '*.' + fm)]
+    print(movieFiles)
    for movie in movieFiles:
-        movieName = movie.split('/')[-1]
-        print("Move file " + movieName)
-        if (os.path.exists(os.path.curdir + '/' + movieName)):
-            print(movieName + "exists, skip.")
-        else:
-            shutil.move(movie, os.path.curdir)
+        print("Move file " + movie)
+        shutil.move(movie, os.path.curdir)
 def UpdateCheck():
    if UpdateCheckSwitch() == '1':
        html2 = get_html('https://raw.githubusercontent.com/yoshiko2/AV_Data_Capture/master/update_check.json')
@ -56,24 +54,12 @@ def UpdateCheck():
 def movie_lists():
    global exclude_directory_1
    global exclude_directory_2
-    directory = config['directory_capture']['directory']
    total=[]
    file_type = ['mp4','avi','rmvb','wmv','mov','mkv','flv','ts']
    exclude_directory_1 = config['common']['failed_output_folder']
    exclude_directory_2 = config['common']['success_output_folder']
-    if directory=='*':
-        remove_total = []
-        for o in file_type:
-            remove_total += glob.glob(r"./" + exclude_directory_1 + "/*." + o)
-            remove_total += glob.glob(r"./" + exclude_directory_2 + "/*." + o)
-        for i in os.listdir(os.getcwd()):
-            for a in file_type:
-                total += glob.glob(r"./" + i + "/*." + a)
-        for b in remove_total:
-            total.remove(b)
-        return total
    for a in file_type:
-        total += glob.glob(r"./" + directory + "/*." + a)
+        total += glob.glob(r"./*." + a)
    return total
 def CreatFailedFolder():
    if not os.path.exists('failed/'):  # 新建failed文件夹
@ -146,7 +132,7 @@ if __name__ =='__main__':
    print('[*]=====================================')
    CreatFailedFolder()
    UpdateCheck()
-    moveMovies(fromPath)
+    moveMovies()
    os.chdir(os.getcwd())

    count = 0
--- a/core.py
+++ b/core.py
--- a/girl.py
+++ b/girl.py
@ -0,0 +1,90 @@
+# coding=utf-8
+import csv
+import re
+from lxml import etree
+import json
+from bs4 import BeautifulSoup
+from ADC_function import *
+import os
+import datetime
+
+def getActorURL(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text()
+    result1 = html.xpath('//*[@id="waterfall"]/div/a/@href')
+    return result1
+# =====
+def getName(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[2]/span/text()')).strip(" ['']")
+    return result1
+def getActorPhotoURL(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
+    return result1
+def getBirthday(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath('//p[contains(text(),"生日: ")]/text()')).strip(" ['']")
+    return result1
+def getAge(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath('//p[contains(text(),"年齡: ")]/text()')).strip(" ['']")
+    return result1
+def getHigh(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath('//p[contains(text(),"身高: ")]/text()')).strip(" ['']")
+    return result1
+def getCup(htmlcode):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath('//p[contains(text(),"罩杯: ")]/text()')).strip(" ['']")
+    return result1
+def getInfo(htmlcode,xpath):
+    html = etree.fromstring(htmlcode, etree.HTMLParser())
+    result1 = str(html.xpath(xpath)).strip(" ['']")
+    return result1
+
+# =====
+
+filename = '2.csv'
+
+def create_csv():
+    path = filename
+    with open(path, 'w') as f:
+        print("名称,头像URL,个人URL,生日,年龄,身高,罩杯", file=f, )
+
+
+def write_csv(htmlcode, url):
+    path = filename
+    with open(path, 'a+') as f:
+        print(getName(htmlcode), end=',', file=f)
+        print(getActorPhotoURL(htmlcode), end=',', file=f)
+        print(url, end=',', file=f)
+        print(getBirthday(htmlcode).strip('生日: '), end=',', file=f)
+        print(getAge(htmlcode).strip('年齡: '), end=',', file=f)
+        print(getHigh(htmlcode).strip('身高: ').strip('cm'), end=',', file=f)
+        print(getCup(htmlcode).strip('罩杯: '), file=f)
+
+def main(url):
+    actor_list = getActorURL(get_html(url))
+    b = 0
+    c = len(actor_list)
+    for i in actor_list:
+        try:
+            htmlcode = get_html(i)
+            write_csv(htmlcode, i)
+            b = b + 1
+            print('[' + str(b) + '/' + str(c) + ']', 'writed', getName(htmlcode))
+        except:
+            print('error')
+            b = b + 1
+            continue
+
+if os.path.exists(filename) == False:
+    print('create file')
+    create_csv()
+
+a = 198
+while a <= 202:
+    print('page:', a)
+    main('https://www.javbus.com/actresses/' + str(a))
+    print(datetime.datetime.now().strftime("%Y.%m.%d-%H:%M:%S"))
+    a = a + 1