diff --git a/ADC_function.py b/ADC_function.py
index b44416a..62006fe 100644
--- a/ADC_function.py
+++ b/ADC_function.py
@@ -1,10 +1,44 @@
import requests
+from configparser import ConfigParser
+import os
+
+config = ConfigParser()
+if os.path.exists('proxy.ini'):
+ config.read('proxy.ini', encoding='UTF-8')
+else:
+ with open("proxy.ini", "wt", encoding='UTF-8') as code:
+ print("[proxy]",file=code)
+ print("proxy=127.0.0.1:1080",file=code)
def get_html(url):#网页请求核心
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- getweb = requests.get(str(url),timeout=5,headers=headers)
- getweb.encoding='utf-8'
- try:
- return getweb.text
- except:
- print("[-]Connect Failed! Please check your Proxy.")
\ No newline at end of file
+ if not str(config['proxy']['proxy']) == '':
+ try:
+ proxies = {"http": "http://" + str(config['proxy']['proxy']),
+ "https": "https://" + str(config['proxy']['proxy'])}
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
+ getweb = requests.get(str(url), timeout=10, headers=headers, proxies=proxies)
+ getweb.encoding = 'utf-8'
+ # print(getweb.text)
+ try:
+ return getweb.text
+ except:
+ print('[-]Connected failed!:Proxy error')
+ except:
+ aaaa=''
+ #print('[-]Connect Failed.')
+
+
+ else:
+ try:
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ getweb = requests.get(str(url), timeout=10, headers=headers)
+ getweb.encoding = 'utf-8'
+ try:
+ return getweb.text
+ except:
+ print("[-]Connect Failed.")
+ except:
+ aaaa = ''
+ #print('[-]Connect Failed.')
\ No newline at end of file
diff --git a/AV_Data_Capture.py b/AV_Data_Capture.py
index 6f81779..322e09b 100644
--- a/AV_Data_Capture.py
+++ b/AV_Data_Capture.py
@@ -2,6 +2,7 @@ import glob
import os
import time
import re
+import sys
def movie_lists():
#MP4
@@ -50,11 +51,11 @@ if __name__ =='__main__':
if '_' in i:
os.rename(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), rreplace(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), '_', '-', 1))
i = rreplace(re.search(r'[^\\/:*?"<>|\r\n]+$', i).group(), '_', '-', 1)
- os.system('python core.py' + ' "' + i + '"') #选择从py文件启动 (用于源码py)
- #os.system('core.exe' + ' "' + i + '"') #选择从exe文件启动(用于EXE版程序)
+ #os.system('python core.py' + ' "' + i + '"') #选择从py文件启动 (用于源码py)
+ os.system('core.exe' + ' "' + i + '"') #选择从exe文件启动(用于EXE版程序)
print("[*]=====================================")
print("[!]Cleaning empty folders")
CEF('JAV_output')
print("[+]All finished!!!")
- time.sleep(3)
\ No newline at end of file
+ input("[+][+]Press enter key exit, you can check the error messge before you exit.\n[+][+]按回车键结束,你可以在结束之前查看错误信息。")
\ No newline at end of file
diff --git a/core.py b/core.py
index 892ea39..adb394b 100644
--- a/core.py
+++ b/core.py
@@ -8,6 +8,8 @@ import javbus
import json
import fc2fans_club
import siro
+from ADC_function import *
+from configparser import ConfigParser
#初始化全局变量
title=''
@@ -25,22 +27,49 @@ tag=[]
#=====================资源下载部分===========================
def DownloadFileWithFilename(url,filename,path): #path = examle:photo , video.in the Project Folder!
- import requests
- try:
- if not os.path.exists(path):
- os.makedirs(path)
- r = requests.get(url)
- with open(str(path) + "/"+str(filename), "wb") as code:
- code.write(r.content)
- except IOError as e:
- print("[-]Movie not found in All website!")
- #print("[*]=====================================")
- return "failed"
- except Exception as e1:
- print(e1)
- print("[-]Download Failed2!")
- time.sleep(3)
- os._exit(0)
+ config = ConfigParser()
+ config.read('proxy.ini', encoding='UTF-8')
+ proxy = str(config['proxy']['proxy'])
+
+ if not str(config['proxy']['proxy']) == '':
+ try:
+ if not os.path.exists(path):
+ os.makedirs(path)
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ r = requests.get(url,timeout=10, headers=headers,proxies={"http": "http://" + str(proxy), "https": "https://" + str(proxy)})
+ with open(str(path) + "/" + str(filename), "wb") as code:
+ code.write(r.content)
+ # print(bytes(r),file=code)
+ except IOError as e:
+ print("[-]Movie not found in All website!")
+ print("[-]" + str(filename), e)
+ # print("[*]=====================================")
+ return "failed"
+ except Exception as e1:
+ print(e1)
+ print("[-]Download Failed2!")
+ time.sleep(3)
+ os._exit(0)
+ else:
+ try:
+ if not os.path.exists(path):
+ os.makedirs(path)
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
+ r = requests.get(url,timeout=10, headers=headers)
+ with open(str(path) + "/" + str(filename), "wb") as code:
+ code.write(r.content)
+ # print(bytes(r),file=code)
+ except IOError as e:
+ print("[-]Movie not found in All website!")
+ print("[-]" + str(filename), e)
+ # print("[*]=====================================")
+ return "failed"
+ except Exception as e1:
+ print(e1)
+ print("[-]Download Failed2!")
+ time.sleep(3)
+ os._exit(0)
def PrintFiles(path):
try:
if not os.path.exists(path):
@@ -73,7 +102,12 @@ def PrintFiles(path):
for i in tag:
print(" " + i + "", file=code)
except:
- aaaa=''
+ aaaaa=''
+ try:
+ for i in tag:
+ print(" " + i + "", file=code)
+ except:
+ aaaaaaaa=''
print(" " + number + "", file=code)
print(" " + release + "", file=code)
print(" "+cover+"", file=code)
@@ -94,6 +128,14 @@ def argparse_get_file():
parser.add_argument("file", help="Write the file path on here")
args = parser.parse_args()
return args.file
+def CreatFailedFolder():
+ if not os.path.exists('failed/'): # 新建failed文件夹
+ try:
+ os.makedirs('failed/')
+ except:
+ print("[-]failed!can not be make folder 'failed'\n[-](Please run as Administrator)")
+ os._exit(0)
+
def getNumberFromFilename(filepath):
global title
global studio
@@ -108,27 +150,59 @@ def getNumberFromFilename(filepath):
global imagecut
global tag
- filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", os.path.basename(filepath)))
- print("[!]Making Data for ["+filename+"]")
- file_number = str(re.search('\w+-\w+', filename).group())
- #print(a)
+#================================================获取文件番号================================================
+ try: #试图提取番号
+ # ====番号获取主程序====
+ try: # 普通提取番号 主要处理包含减号-的番号
+ filepath.strip('22-sht.me').strip('-HD').strip('-hd')
+ filename = str(re.sub("\[\d{4}-\d{1,2}-\d{1,2}\] - ", "", filepath)) # 去除文件名中文件名
+ file_number = re.search('\w+-\d+', filename).group()
+ except: # 提取不含减号-的番号
+ try: # 提取东京热番号格式 n1087
+ filename1 = str(re.sub("h26\d", "", filepath)).strip('Tokyo-hot').strip('tokyo-hot')
+ filename0 = str(re.sub(".*?\.com-\d+", "", filename1)).strip('_')
+ file_number = str(re.search('n\d{4}', filename0).group(0))
+ except: # 提取无减号番号
+ filename1 = str(re.sub("h26\d", "", filepath)) # 去除h264/265
+ filename0 = str(re.sub(".*?\.com-\d+", "", filename1))
+ file_number2 = str(re.match('\w+', filename0).group())
+ file_number = str(file_number2.replace(re.match("^[A-Za-z]+", file_number2).group(),re.match("^[A-Za-z]+", file_number2).group() + '-'))
+ if not re.search('\w-', file_number).group() == 'None':
+ file_number = re.search('\w+-\w+', filename).group()
+ #上面是插入减号-到番号中
+ print("[!]Making Data for [" + filename + "],the number is [" + file_number + "]")
+ # ====番号获取主程序=结束===
+ except Exception as e: #番号提取异常
+ print('[-]'+str(os.path.basename(filepath))+' Cannot catch the number :')
+ print('[-]' + str(os.path.basename(filepath)) + ' :', e)
+ print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
+ shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
+ os._exit(0)
+ except IOError as e2:
+ print('[-]' + str(os.path.basename(filepath)) + ' Cannot catch the number :')
+ print('[-]' + str(os.path.basename(filepath)) + ' :',e2)
+ print('[-]Move ' + os.path.basename(filepath) + ' to failed folder')
+ shutil.move(filepath, str(os.getcwd()) + '/' + 'failed/')
+ os._exit(0)
try:
-
-#================================================网站规则添加开始================================================
-
+# ================================================网站规则添加开始================================================
try: #添加 需要 正则表达式的规则
+ #=======================javbus.py=======================
if re.search('^\d{5,}', file_number).group() in filename:
json_data = json.loads(javbus.main_uncensored(file_number))
except: #添加 无需 正则表达式的规则
+ # ====================fc2fans_club.py===================
if 'fc2' in filename:
json_data = json.loads(fc2fans_club.main(file_number))
elif 'FC2' in filename:
json_data = json.loads(fc2fans_club.main(file_number))
+
+ #========================siro.py========================
elif 'siro' in filename:
json_data = json.loads(siro.main(file_number))
elif 'SIRO' in filename:
@@ -137,38 +211,53 @@ def getNumberFromFilename(filepath):
json_data = json.loads(siro.main(file_number))
elif '259LUXU' in filename:
json_data = json.loads(siro.main(file_number))
+ elif '300MAAN' in filename:
+ json_data = json.loads(siro.main(file_number))
+ elif '300maan' in filename:
+ json_data = json.loads(siro.main(file_number))
+ elif '326SCP' in filename:
+ json_data = json.loads(siro.main(file_number))
+ elif '326scp' in filename:
+ json_data = json.loads(siro.main(file_number))
+ elif '326URF' in filename:
+ json_data = json.loads(siro.main(file_number))
+ elif '326urf' in filename:
+ json_data = json.loads(siro.main(file_number))
+
+ #=======================javbus.py=======================
else:
json_data = json.loads(javbus.main(file_number))
+
#================================================网站规则添加结束================================================
-
-
- title = json_data['title']
- studio = json_data['studio']
- year = json_data['year']
- outline = json_data['outline']
- runtime = json_data['runtime']
+ title = json_data['title']
+ studio = json_data['studio']
+ year = json_data['year']
+ outline = json_data['outline']
+ runtime = json_data['runtime']
director = json_data['director']
- actor = str(json_data['actor']).strip("[ ]").replace("'",'').replace(" ",'').split(',')
- release = json_data['release']
- number = json_data['number']
- cover = json_data['cover']
+ actor = str(json_data['actor']).strip("[ ]").replace("'",'').replace(" ",'').split(',') #字符串转列表
+ release = json_data['release']
+ number = json_data['number']
+ cover = json_data['cover']
imagecut = json_data['imagecut']
- tag = str(json_data['tag']).strip("[ ]").replace("'",'').replace(" ",'').split(',')
- except:
- print('[-]File '+filename+'`s number can not be caught')
+ tag = str(json_data['tag']).strip("[ ]").replace("'",'').replace(" ",'').split(',') #字符串转列表
+
+
+ except IOError as e:
+ print('[-]'+str(e))
+ print('[-]Move ' + filename + ' to failed folder')
+ shutil.move(filepath, str(os.getcwd())+'/'+'failed/')
+ os._exit(0)
+
+ except Exception as e:
+ print('[-]'+str(e))
print('[-]Move ' + filename + ' to failed folder')
- if not os.path.exists('failed/'): # 新建failed文件夹
- os.makedirs('failed/')
- if not os.path.exists('failed/'):
- print("[-]failed!Dirs can not be make (Please run as Administrator)")
- time.sleep(3)
- os._exit(0)
shutil.move(filepath, str(os.getcwd())+'/'+'failed/')
os._exit(0)
@@ -177,11 +266,6 @@ path = '' #设置path为全局变量,后面移动文件要用
def creatFolder():
actor2 = str(actor).strip("[ ]").replace("'",'').replace(" ",'')
global path
- if not os.path.exists('failed/'): #新建failed文件夹
- os.makedirs('failed/')
- if not os.path.exists('failed/'):
- print("[-]failed!Dirs can not be make (Please run as Administrator)")
- os._exit(0)
if len(actor2) > 240: #新建成功输出文件夹
path = 'JAV_output' + '/' + '超多人' + '/' + number #path为影片+元数据所在目录
else:
@@ -220,6 +304,7 @@ def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移
if __name__ == '__main__':
filepath=argparse_get_file() #影片的路径
+ CreatFailedFolder()
getNumberFromFilename(filepath) #定义番号
creatFolder() #创建文件夹
imageDownload(filepath) #creatFoder会返回番号路径
diff --git a/javbus.py b/javbus.py
index fc68aef..2b8744b 100644
--- a/javbus.py
+++ b/javbus.py
@@ -12,7 +12,7 @@ import json
def get_html(url):#网页请求核心
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
- getweb = requests.get(str(url),timeout=5,headers=headers).text
+ getweb = requests.get(str(url),timeout=10,headers=headers).text
try:
return getweb
except:
@@ -97,9 +97,6 @@ def main(number):
def main_uncensored(number):
htmlcode = get_html('https://www.javbus.com/' + number)
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
- #print('un')
- #print('https://www.javbus.com/' + number)
dic = {
'title': getTitle(htmlcode),
'studio': getStudio(htmlcode),
@@ -116,13 +113,10 @@ def main_uncensored(number):
}
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- if getYear(htmlcode) == '':
- #print('un2')
+ if getYear(htmlcode) == '' or getYear(htmlcode) == 'null':
number2 = number.replace('-', '_')
htmlcode = get_html('https://www.javbus.com/' + number2)
- #print('https://www.javbus.com/' + number2)
- dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number2.replace("_", ''))
- dic = {
+ dic2 = {
'title': getTitle(htmlcode),
'studio': getStudio(htmlcode),
'year': getYear(htmlcode),
@@ -136,11 +130,10 @@ def main_uncensored(number):
'tag': getTag(htmlcode),
'imagecut': 0,
}
- js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
- #print(js)
- return js
- else:
- bbb=''
+ js2 = json.dumps(dic2, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
+ return js2
+
+ return js
# def return1():
diff --git a/siro.py b/siro.py
index 548d610..f7359ee 100644
--- a/siro.py
+++ b/siro.py
@@ -7,7 +7,7 @@ from bs4 import BeautifulSoup
def get_html(url):#网页请求核心
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
cookies = {'adc':'1'}
- getweb = requests.get(str(url),timeout=5,cookies=cookies,headers=headers).text
+ getweb = requests.get(str(url),timeout=10,cookies=cookies,headers=headers).text
try:
return getweb
except: