Beta 10.2 Update
This commit is contained in:
parent
36c436772c
commit
764fba74ec
@ -1,8 +1,15 @@
|
|||||||
import requests
|
import requests
|
||||||
from configparser import ConfigParser
|
from configparser import RawConfigParser
|
||||||
import os
|
import os
|
||||||
|
import re
|
||||||
|
|
||||||
config = ConfigParser()
|
# content = open('proxy.ini').read()
|
||||||
|
# content = re.sub(r"\xfe\xff","", content)
|
||||||
|
# content = re.sub(r"\xff\xfe","", content)
|
||||||
|
# content = re.sub(r"\xef\xbb\xbf","", content)
|
||||||
|
# open('BaseConfig.cfg', 'w').write(content)
|
||||||
|
|
||||||
|
config = RawConfigParser()
|
||||||
if os.path.exists('proxy.ini'):
|
if os.path.exists('proxy.ini'):
|
||||||
config.read('proxy.ini', encoding='UTF-8')
|
config.read('proxy.ini', encoding='UTF-8')
|
||||||
else:
|
else:
|
||||||
@ -10,14 +17,14 @@ else:
|
|||||||
print("[proxy]",file=code)
|
print("[proxy]",file=code)
|
||||||
print("proxy=127.0.0.1:1080",file=code)
|
print("proxy=127.0.0.1:1080",file=code)
|
||||||
|
|
||||||
def get_html(url):#网页请求核心
|
def get_html(url,cookies = None):#网页请求核心
|
||||||
if not str(config['proxy']['proxy']) == '':
|
if not str(config['proxy']['proxy']) == '':
|
||||||
proxies = {
|
proxies = {
|
||||||
"http" : "http://" + str(config['proxy']['proxy']),
|
"http" : "http://" + str(config['proxy']['proxy']),
|
||||||
"https": "https://" + str(config['proxy']['proxy'])
|
"https": "https://" + str(config['proxy']['proxy'])
|
||||||
}
|
}
|
||||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
|
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3100.0 Safari/537.36'}
|
||||||
getweb = requests.get(str(url), headers=headers, proxies=proxies)
|
getweb = requests.get(str(url), headers=headers, proxies=proxies,cookies=cookies)
|
||||||
getweb.encoding = 'utf-8'
|
getweb.encoding = 'utf-8'
|
||||||
# print(getweb.text)
|
# print(getweb.text)
|
||||||
try:
|
try:
|
||||||
@ -27,7 +34,7 @@ def get_html(url):#网页请求核心
|
|||||||
else:
|
else:
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
|
||||||
getweb = requests.get(str(url), headers=headers)
|
getweb = requests.get(str(url), headers=headers,cookies=cookies)
|
||||||
getweb.encoding = 'utf-8'
|
getweb.encoding = 'utf-8'
|
||||||
try:
|
try:
|
||||||
return getweb.text
|
return getweb.text
|
||||||
|
@ -19,8 +19,10 @@ def movie_lists():
|
|||||||
f2 = glob.glob(os.getcwd() + r"\*.mkv")
|
f2 = glob.glob(os.getcwd() + r"\*.mkv")
|
||||||
# FLV
|
# FLV
|
||||||
g2 = glob.glob(os.getcwd() + r"\*.flv")
|
g2 = glob.glob(os.getcwd() + r"\*.flv")
|
||||||
|
# TS
|
||||||
|
h2 = glob.glob(os.getcwd() + r"\*.ts")
|
||||||
|
|
||||||
total = a2+b2+c2+d2+e2+f2+g2
|
total = a2+b2+c2+d2+e2+f2+g2+h2
|
||||||
return total
|
return total
|
||||||
|
|
||||||
def lists_from_test(custom_nuber): #电影列表
|
def lists_from_test(custom_nuber): #电影列表
|
||||||
|
2
core.py
2
core.py
@ -299,7 +299,7 @@ def cutImage():
|
|||||||
h = img.height
|
h = img.height
|
||||||
img.save(path + '/' + naming_rule + '.png')
|
img.save(path + '/' + naming_rule + '.png')
|
||||||
def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移动至的位置
|
def pasteFileToFolder(filepath, path): #文件路径,番号,后缀,要移动至的位置
|
||||||
houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|avi|rmvb|wmv|mov|mp4|mkv|flv)$', filepath).group())
|
houzhui = str(re.search('[.](AVI|RMVB|WMV|MOV|MP4|MKV|FLV|TS|avi|rmvb|wmv|mov|mp4|mkv|flv|ts)$', filepath).group())
|
||||||
os.rename(filepath, naming_rule + houzhui)
|
os.rename(filepath, naming_rule + houzhui)
|
||||||
shutil.move(naming_rule + houzhui, path)
|
shutil.move(naming_rule + houzhui, path)
|
||||||
|
|
||||||
|
@ -38,8 +38,8 @@ def getOutline(htmlcode,number): #获取番号
|
|||||||
# result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000)
|
# result = str(html.xpath('//*[@id="container"]/div[1]/div/article/section[4]/p/text()')).replace("\\n",'',10000).strip(" ['']").replace("'",'',10000)
|
||||||
# return result
|
# return result
|
||||||
|
|
||||||
def main(number):
|
def main(number2):
|
||||||
str(number).lstrip("FC2-").lstrip("fc2-").lstrip("fc2_").lstrip("fc2-")
|
number=number2.replace('PPV','').replace('ppv','')
|
||||||
htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html')
|
htmlcode = ADC_function.get_html('http://fc2fans.club/html/FC2-' + number + '.html')
|
||||||
dic = {
|
dic = {
|
||||||
'title': getTitle(htmlcode),
|
'title': getTitle(htmlcode),
|
||||||
|
35
javbus.py
35
javbus.py
@ -9,14 +9,7 @@ from bs4 import BeautifulSoup#need install
|
|||||||
from PIL import Image#need install
|
from PIL import Image#need install
|
||||||
import time
|
import time
|
||||||
import json
|
import json
|
||||||
|
from ADC_function import *
|
||||||
def get_html(url):#网页请求核心
|
|
||||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
|
|
||||||
getweb = requests.get(str(url),timeout=10,headers=headers).text
|
|
||||||
try:
|
|
||||||
return getweb
|
|
||||||
except:
|
|
||||||
print("[-]Connect Failed! Please check your Proxy.")
|
|
||||||
|
|
||||||
def getTitle(htmlcode): #获取标题
|
def getTitle(htmlcode): #获取标题
|
||||||
doc = pq(htmlcode)
|
doc = pq(htmlcode)
|
||||||
@ -34,7 +27,6 @@ def getCover(htmlcode): #获取封面链接
|
|||||||
doc = pq(htmlcode)
|
doc = pq(htmlcode)
|
||||||
image = doc('a.bigImage')
|
image = doc('a.bigImage')
|
||||||
return image.attr('href')
|
return image.attr('href')
|
||||||
print(image.attr('href'))
|
|
||||||
def getRelease(htmlcode): #获取出版日期
|
def getRelease(htmlcode): #获取出版日期
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[2]/text()')).strip(" ['']")
|
||||||
@ -62,8 +54,10 @@ def getOutline(htmlcode): #获取演员
|
|||||||
doc = pq(htmlcode)
|
doc = pq(htmlcode)
|
||||||
result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
|
result = str(doc('tr td div.mg-b20.lh4 p.mg-b20').text())
|
||||||
return result
|
return result
|
||||||
|
def getSerise(htmlcode):
|
||||||
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
|
result = str(html.xpath('/html/body/div[5]/div[1]/div[2]/p[7]/a/text()')).strip(" ['']")
|
||||||
|
return result
|
||||||
def getTag(htmlcode): # 获取演员
|
def getTag(htmlcode): # 获取演员
|
||||||
tag = []
|
tag = []
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||||
@ -79,7 +73,7 @@ def main(number):
|
|||||||
htmlcode=get_html('https://www.javbus.com/'+number)
|
htmlcode=get_html('https://www.javbus.com/'+number)
|
||||||
dww_htmlcode=get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
dww_htmlcode=get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
||||||
dic = {
|
dic = {
|
||||||
'title': getTitle(htmlcode),
|
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(htmlcode),
|
||||||
'year': str(re.search('\d{4}',getYear(htmlcode)).group()),
|
'year': str(re.search('\d{4}',getYear(htmlcode)).group()),
|
||||||
'outline': getOutline(dww_htmlcode),
|
'outline': getOutline(dww_htmlcode),
|
||||||
@ -90,7 +84,8 @@ def main(number):
|
|||||||
'number': getNum(htmlcode),
|
'number': getNum(htmlcode),
|
||||||
'cover': getCover(htmlcode),
|
'cover': getCover(htmlcode),
|
||||||
'imagecut': 1,
|
'imagecut': 1,
|
||||||
'tag': getTag(htmlcode)
|
'tag': getTag(htmlcode),
|
||||||
|
'label': getSerise(htmlcode),
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
||||||
|
|
||||||
@ -98,7 +93,7 @@ def main(number):
|
|||||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||||
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
||||||
dic = {
|
dic = {
|
||||||
'title': getTitle(htmlcode),
|
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(htmlcode),
|
||||||
'year': getYear(htmlcode),
|
'year': getYear(htmlcode),
|
||||||
'outline': getOutline(dww_htmlcode),
|
'outline': getOutline(dww_htmlcode),
|
||||||
@ -109,7 +104,8 @@ def main(number):
|
|||||||
'number': getNum(htmlcode),
|
'number': getNum(htmlcode),
|
||||||
'cover': getCover(htmlcode),
|
'cover': getCover(htmlcode),
|
||||||
'imagecut': 1,
|
'imagecut': 1,
|
||||||
'tag': getTag(htmlcode)
|
'tag': getTag(htmlcode),
|
||||||
|
'label': getSerise(htmlcode),
|
||||||
}
|
}
|
||||||
js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js2 = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
return js2
|
return js2
|
||||||
@ -118,11 +114,12 @@ def main(number):
|
|||||||
|
|
||||||
def main_uncensored(number):
|
def main_uncensored(number):
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number)
|
htmlcode = get_html('https://www.javbus.com/' + number)
|
||||||
|
dww_htmlcode = get_html("https://www.dmm.co.jp/mono/dvd/-/detail/=/cid=" + number.replace("-", ''))
|
||||||
dic = {
|
dic = {
|
||||||
'title': getTitle(htmlcode),
|
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(htmlcode),
|
||||||
'year': getYear(htmlcode),
|
'year': getYear(htmlcode),
|
||||||
'outline': getOutline(htmlcode),
|
'outline': getOutline(dww_htmlcode),
|
||||||
'runtime': getRuntime(htmlcode),
|
'runtime': getRuntime(htmlcode),
|
||||||
'director': getDirector(htmlcode),
|
'director': getDirector(htmlcode),
|
||||||
'actor': getActor(htmlcode),
|
'actor': getActor(htmlcode),
|
||||||
@ -130,6 +127,7 @@ def main_uncensored(number):
|
|||||||
'number': getNum(htmlcode),
|
'number': getNum(htmlcode),
|
||||||
'cover': getCover(htmlcode),
|
'cover': getCover(htmlcode),
|
||||||
'tag': getTag(htmlcode),
|
'tag': getTag(htmlcode),
|
||||||
|
'label': getSerise(htmlcode),
|
||||||
'imagecut': 0,
|
'imagecut': 0,
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
@ -138,7 +136,7 @@ def main_uncensored(number):
|
|||||||
number2 = number.replace('-', '_')
|
number2 = number.replace('-', '_')
|
||||||
htmlcode = get_html('https://www.javbus.com/' + number2)
|
htmlcode = get_html('https://www.javbus.com/' + number2)
|
||||||
dic2 = {
|
dic2 = {
|
||||||
'title': getTitle(htmlcode),
|
'title': str(re.sub('\w+-\d+-','',getTitle(htmlcode))),
|
||||||
'studio': getStudio(htmlcode),
|
'studio': getStudio(htmlcode),
|
||||||
'year': getYear(htmlcode),
|
'year': getYear(htmlcode),
|
||||||
'outline': '',
|
'outline': '',
|
||||||
@ -149,6 +147,7 @@ def main_uncensored(number):
|
|||||||
'number': getNum(htmlcode),
|
'number': getNum(htmlcode),
|
||||||
'cover': getCover(htmlcode),
|
'cover': getCover(htmlcode),
|
||||||
'tag': getTag(htmlcode),
|
'tag': getTag(htmlcode),
|
||||||
|
'label':getSerise(htmlcode),
|
||||||
'imagecut': 0,
|
'imagecut': 0,
|
||||||
}
|
}
|
||||||
js2 = json.dumps(dic2, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
js2 = json.dumps(dic2, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'), ) # .encode('UTF-8')
|
||||||
|
69
siro.py
69
siro.py
@ -3,70 +3,74 @@ from lxml import etree
|
|||||||
import json
|
import json
|
||||||
import requests
|
import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
from ADC_function import *
|
||||||
def get_html(url):#网页请求核心
|
|
||||||
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'}
|
|
||||||
cookies = {'adc':'1'}
|
|
||||||
getweb = requests.get(str(url),timeout=10,cookies=cookies,headers=headers).text
|
|
||||||
try:
|
|
||||||
return getweb
|
|
||||||
except:
|
|
||||||
print("[-]Connect Failed! Please check your Proxy.")
|
|
||||||
|
|
||||||
def getTitle(a):
|
def getTitle(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
|
result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text()
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text()
|
||||||
result=str(html.xpath('//table[2]/tr[1]/td/a/text()')).strip(" ['\\n ']")
|
result2=str(html.xpath('//table/tr[1]/td[1]/text()')).strip(" ['\\n ']")
|
||||||
return result
|
result1 = str(html.xpath('//table/tr[1]/td[1]/a/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1+result2).strip('+')
|
||||||
def getStudio(a):
|
def getStudio(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result=str(html.xpath('//table[2]/tr[2]/td/a/text()')).strip(" ['\\n ']")
|
result2=str(html.xpath('//table[2]/tr[2]/td/text()')).strip(" ['\\n ']")
|
||||||
return result
|
result1 = str(html.xpath('//table/tr[2]/td[1]/a/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1+result2).strip('+')
|
||||||
def getRuntime(a):
|
def getRuntime(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result=str(html.xpath('//table[2]/tr[3]/td/text()')).strip(" ['\\n ']")
|
result2=str(html.xpath('//table/tr[3]/td[1]/text()')).strip(" ['\\n ']")
|
||||||
return result
|
result1 = str(html.xpath('//table/tr[3]/td[1]/a/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1 + result2).strip('+').strip('mi')
|
||||||
|
def getLabel(a):
|
||||||
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
|
result2=str(html.xpath('//table/tr[6]/td[1]/text()')).strip(" ['\\n ']")
|
||||||
|
result1 = str(html.xpath('//table/tr[6]/td[1]/a/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1 + result2).strip('+')
|
||||||
def getNum(a):
|
def getNum(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result=str(html.xpath('//table[2]/tr[4]/td/text()')).strip(" ['\\n ']")
|
result2=str(html.xpath('//table/tr[2]/td[4]/a/text()')).strip(" ['\\n ']")
|
||||||
return result
|
result1 = str(html.xpath('//table/tr[2]/td[4]/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1 + result2).strip('+')
|
||||||
def getYear(a):
|
def getYear(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
#result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n ']")
|
result2=str(html.xpath('//table/tr[2]/td[5]/a/text()')).strip(" ['\\n ']")
|
||||||
result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n ']")
|
result1=str(html.xpath('//table/tr[2]/td[5]/text()')).strip(" ['\\n ']")
|
||||||
return result
|
return result2+result1
|
||||||
def getRelease(a):
|
def getRelease(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result=str(html.xpath('//table[2]/tr[5]/td/text()')).strip(" ['\\n ']")
|
result2=str(html.xpath('//table/tr[5]/td[1]/text()')).strip(" ['\\n ']")
|
||||||
return result
|
result1 = str(html.xpath('//table/tr[5]/a/td[1]/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1 + result2).strip('+')
|
||||||
def getTag(a):
|
def getTag(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result=str(html.xpath('//table[2]/tr[9]/td/text()')).strip(" ['\\n ']")
|
result2=str(html.xpath('//table/tr[8]/td[1]/a/text()')).strip(" ['\\n ']")
|
||||||
return result
|
result1=str(html.xpath('//table/tr[8]/td[1]/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1 + result2).strip('+')
|
||||||
def getCover(htmlcode):
|
def getCover(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
def getDirector(a):
|
def getDirector(a):
|
||||||
html = etree.fromstring(a, etree.HTMLParser())
|
html = etree.fromstring(a, etree.HTMLParser())
|
||||||
result = str(html.xpath('//table[2]/tr[7]/td/a/text()')).strip(" ['\\n ']")
|
result1 = str(html.xpath('//table/tr[2]/td[1]/text()')).strip(" ['\\n ']")
|
||||||
return result
|
result2 = str(html.xpath('//table/tr[2]/td[1]/a/text()')).strip(" ['\\n ']")
|
||||||
|
return str(result1 + result2).strip('+')
|
||||||
def getOutline(htmlcode):
|
def getOutline(htmlcode):
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
||||||
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
|
result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']")
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def main(number):
|
def main(number):
|
||||||
htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number))
|
htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number),cookies={'adc':'1'})
|
||||||
soup = BeautifulSoup(htmlcode, 'lxml')
|
soup = BeautifulSoup(htmlcode, 'lxml')
|
||||||
a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','')
|
a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n ','')
|
||||||
|
#print(a)
|
||||||
dic = {
|
dic = {
|
||||||
'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
|
'title': getTitle(htmlcode).replace("\\n",'').replace(' ',''),
|
||||||
'studio': getStudio(a),
|
'studio': getStudio(a),
|
||||||
'year': getYear(a),
|
'year': str(re.search('\d{4}',getRelease(a)).group()),
|
||||||
'outline': getOutline(htmlcode),
|
'outline': getOutline(htmlcode),
|
||||||
'runtime': getRuntime(a),
|
'runtime': getRuntime(a),
|
||||||
'director': getDirector(a),
|
'director': getDirector(a),
|
||||||
@ -75,7 +79,10 @@ def main(number):
|
|||||||
'number': number,
|
'number': number,
|
||||||
'cover': getCover(htmlcode),
|
'cover': getCover(htmlcode),
|
||||||
'imagecut': 0,
|
'imagecut': 0,
|
||||||
'tag':' ',
|
'tag': getTag(a).replace("'\\n',",'').replace(' ', '').replace("\\n','\\n",','),
|
||||||
|
'label':getLabel(a)
|
||||||
}
|
}
|
||||||
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8')
|
||||||
|
#print('https://www.mgstage.com/product/product_detail/'+str(number))
|
||||||
return js
|
return js
|
||||||
|
#print(main('SIRO-3552'))
|
Loading…
Reference in New Issue
Block a user