commit
						32a19bb989
					
				| @ -119,3 +119,18 @@ def get_html(url,cookies = None):#网页请求核心 | ||||
|     print('[-]Connect Failed! Please check your Proxy or Network!') | ||||
| 
 | ||||
| 
 | ||||
| def post_html(url: str, query: dict) -> requests.Response: | ||||
|     proxy, timeout, retry_count = get_network_settings() | ||||
| 
 | ||||
|     if proxy: | ||||
|         proxies = {"http": "http://" + proxy, "https": "https://" + proxy} | ||||
|     else: | ||||
|         proxies = {} | ||||
| 
 | ||||
|     for i in range(retry_count): | ||||
|         try: | ||||
|             result = requests.post(url, data=query, proxies=proxies) | ||||
|             return result | ||||
|         except requests.exceptions.ProxyError: | ||||
|             print("[-]Connect retry {}/{}".format(i+1, retry_count)) | ||||
|     print("[-]Connect Failed! Please check your Proxy or Network!") | ||||
|  | ||||
							
								
								
									
										4
									
								
								core.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								core.py
									
									
									
									
									
								
							| @ -17,6 +17,7 @@ import avsox | ||||
| import javbus | ||||
| import javdb | ||||
| import fanza | ||||
| import jav321 | ||||
| import requests | ||||
| 
 | ||||
| 
 | ||||
| @ -57,10 +58,11 @@ def getDataFromJSON(file_number, filepath, failed_folder):  # 从JSON返回元 | ||||
|         "javdb": javdb.main, | ||||
|         "javbus": javbus.main, | ||||
|         "mgstage": mgstage.main, | ||||
|         "jav321": jav321.main, | ||||
|     } | ||||
| 
 | ||||
|     # default fetch order list, from the begining to the end | ||||
|     sources = ["javbus", "javdb", "fanza", "mgstage", "fc2",  "avsox"] | ||||
|     sources = ["javbus", "javdb", "fanza", "mgstage", "fc2",  "avsox", "jav321"] | ||||
| 
 | ||||
|     # if the input file name matches centain rules, | ||||
|     # move some web service to the begining of the list | ||||
|  | ||||
							
								
								
									
										73
									
								
								jav321.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								jav321.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,73 @@ | ||||
| import json | ||||
| from bs4 import BeautifulSoup | ||||
| from lxml import html | ||||
| from ADC_function import post_html | ||||
| 
 | ||||
| 
 | ||||
| def main(number: str) -> json: | ||||
|     result = post_html(url="https://www.jav321.com/search", query={"sn": number}) | ||||
|     soup = BeautifulSoup(result.text, "html.parser") | ||||
|     lx = html.fromstring(str(soup)) | ||||
| 
 | ||||
|     if "/video/" in result.url: | ||||
|         data = parse_info(soup=soup) | ||||
|         dic = { | ||||
|             "title": get_title(lx=lx), | ||||
|             "studio": "", | ||||
|             "year": data["release"][:4], | ||||
|             "outline": get_outline(lx=lx), | ||||
|             "director": "", | ||||
|             "cover": get_cover(lx=lx), | ||||
|             "imagecut": 1, | ||||
|             "actor_photo": "", | ||||
|             "website": result.url, | ||||
|             "source": "jav321.py", | ||||
|             **data, | ||||
|         } | ||||
|     else: | ||||
|         dic = {} | ||||
| 
 | ||||
|     return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':')) | ||||
| 
 | ||||
| 
 | ||||
| def get_title(lx: html.HtmlElement) -> str: | ||||
|     return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip() | ||||
| 
 | ||||
| 
 | ||||
| def parse_info(soup: BeautifulSoup) -> dict: | ||||
|     data = str(soup.select_one("div.row > div.col-md-9")).split("<br/>") | ||||
| 
 | ||||
|     return { | ||||
|         "actor": get_anchor_info(h=data[0]), | ||||
|         "label": get_anchor_info(h=data[1]), | ||||
|         "tag": get_anchor_info(h=data[2]), | ||||
|         "number": get_text_info(h=data[3]), | ||||
|         "release": get_text_info(h=data[4]), | ||||
|         "runtime": get_text_info(h=data[5]), | ||||
|     } | ||||
| 
 | ||||
| 
 | ||||
| def get_anchor_info(h: str) -> str: | ||||
|     result = [] | ||||
| 
 | ||||
|     data = BeautifulSoup(h, "html.parser").find_all("a", href=True) | ||||
|     for d in data: | ||||
|         result.append(d.text) | ||||
| 
 | ||||
|     return ",".join(result) | ||||
| 
 | ||||
| 
 | ||||
| def get_text_info(h: str) -> str: | ||||
|     return h.split(": ")[1] | ||||
| 
 | ||||
| 
 | ||||
| def get_cover(lx: html.HtmlElement) -> str: | ||||
|     return lx.xpath("/html/body/div[2]/div[2]/div[1]/p/a/img/@src")[0] | ||||
| 
 | ||||
| 
 | ||||
| def get_outline(lx: html.HtmlElement) -> str: | ||||
|     return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0] | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     print(main("wmc-002")) | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user