Add support jav321

2020-03-19 22:41:55 +09:00 · 2020-03-19 22:41:55 +09:00 · 9a530f4e46
commit 9a530f4e46
parent 780e47ffba
3 changed files with 91 additions and 1 deletions
--- a/ADC_function.py
+++ b/ADC_function.py
@ -119,3 +119,18 @@ def get_html(url,cookies = None):#网页请求核心
    print('[-]Connect Failed! Please check your Proxy or Network!')


+def post_html(url: str, query: dict) -> requests.Response:
+    proxy, timeout, retry_count = get_network_settings()
+
+    if proxy:
+        proxies = {"http": "http://" + proxy, "https": "https://" + proxy}
+    else:
+        proxies = {}
+
+    for i in range(retry_count):
+        try:
+            result = requests.post(url, data=query, proxies=proxies)
+            return result
+        except requests.exceptions.ProxyError:
+            print("[-]Connect retry {}/{}".format(i+1, retry_count))
+    print("[-]Connect Failed! Please check your Proxy or Network!")
--- a/core.py
+++ b/core.py
@ -17,6 +17,7 @@ import avsox
 import javbus
 import javdb
 import fanza
+import jav321
 import requests


@ -57,10 +58,11 @@ def getDataFromJSON(file_number, filepath, failed_folder):  # 从JSON返回元
        "javdb": javdb.main,
        "javbus": javbus.main,
        "mgstage": mgstage.main,
+        "jav321": jav321.main,
    }

    # default fetch order list, from the begining to the end
-    sources = ["javbus", "javdb", "fanza", "mgstage", "fc2",  "avsox"]
+    sources = ["javbus", "javdb", "fanza", "mgstage", "fc2",  "avsox", "jav321"]

    # if the input file name matches centain rules,
    # move some web service to the begining of the list
--- a/jav321.py
+++ b/jav321.py
@ -0,0 +1,73 @@
+import json
+from bs4 import BeautifulSoup
+from lxml import html
+from ADC_function import post_html
+
+
+def main(number: str) -> json:
+    result = post_html(url="https://www.jav321.com/search", query={"sn": number})
+    soup = BeautifulSoup(result.text, "html.parser")
+    lx = html.fromstring(str(soup))
+
+    if "/video/" in result.url:
+        data = parse_info(soup=soup)
+        dic = {
+            "title": get_title(lx=lx),
+            "studio": "",
+            "year": data["release"][:4],
+            "outline": get_outline(lx=lx),
+            "director": "",
+            "cover": get_cover(lx=lx),
+            "imagecut": 1,
+            "actor_photo": "",
+            "website": result.url,
+            "source": "jav321.py",
+            **data,
+        }
+    else:
+        dic = {}
+
+    return json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'))
+
+
+def get_title(lx: html.HtmlElement) -> str:
+    return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[1]/h3/text()")[0].strip()
+
+
+def parse_info(soup: BeautifulSoup) -> dict:
+    data = str(soup.select_one("div.row > div.col-md-9")).split("<br/>")
+
+    return {
+        "actor": get_anchor_info(h=data[0]),
+        "label": get_anchor_info(h=data[1]),
+        "tag": get_anchor_info(h=data[2]),
+        "number": get_text_info(h=data[3]),
+        "release": get_text_info(h=data[4]),
+        "runtime": get_text_info(h=data[5]),
+    }
+
+
+def get_anchor_info(h: str) -> str:
+    result = []
+
+    data = BeautifulSoup(h, "html.parser").find_all("a", href=True)
+    for d in data:
+        result.append(d.text)
+
+    return ",".join(result)
+
+
+def get_text_info(h: str) -> str:
+    return h.split(": ")[1]
+
+
+def get_cover(lx: html.HtmlElement) -> str:
+    return lx.xpath("/html/body/div[2]/div[2]/div[1]/p/a/img/@src")[0]
+
+
+def get_outline(lx: html.HtmlElement) -> str:
+    return lx.xpath("/html/body/div[2]/div[1]/div[1]/div[2]/div[3]/div/text()")[0]
+
+
+if __name__ == "__main__":
+    print(main("wmc-002"))