diff --git a/girl.py b/girl.py deleted file mode 100644 index b69d717..0000000 --- a/girl.py +++ /dev/null @@ -1,90 +0,0 @@ -# coding=utf-8 -import csv -import re -from lxml import etree -import json -from bs4 import BeautifulSoup -from ADC_function import * -import os -import datetime - -def getActorURL(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text() - result1 = html.xpath('//*[@id="waterfall"]/div/a/@href') - return result1 -# ===== -def getName(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[2]/span/text()')).strip(" ['']") - return result1 -def getActorPhotoURL(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") - return result1 -def getBirthday(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath('//p[contains(text(),"生日: ")]/text()')).strip(" ['']") - return result1 -def getAge(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath('//p[contains(text(),"年齡: ")]/text()')).strip(" ['']") - return result1 -def getHigh(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath('//p[contains(text(),"身高: ")]/text()')).strip(" ['']") - return result1 -def getCup(htmlcode): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath('//p[contains(text(),"罩杯: ")]/text()')).strip(" ['']") - return result1 -def getInfo(htmlcode,xpath): - html = etree.fromstring(htmlcode, etree.HTMLParser()) - result1 = str(html.xpath(xpath)).strip(" ['']") - return result1 - -# ===== - -filename = '2.csv' - -def create_csv(): - path = filename - with open(path, 'w') as f: - print("名称,头像URL,个人URL,生日,年龄,身高,罩杯", file=f, ) - - -def write_csv(htmlcode, url): - path = filename - with open(path, 'a+') as f: - print(getName(htmlcode), end=',', file=f) - print(getActorPhotoURL(htmlcode), end=',', file=f) - print(url, end=',', file=f) - print(getBirthday(htmlcode).strip('生日: '), end=',', file=f) - print(getAge(htmlcode).strip('年齡: '), end=',', file=f) - print(getHigh(htmlcode).strip('身高: ').strip('cm'), end=',', file=f) - print(getCup(htmlcode).strip('罩杯: '), file=f) - -def main(url): - actor_list = getActorURL(get_html(url)) - b = 0 - c = len(actor_list) - for i in actor_list: - try: - htmlcode = get_html(i) - write_csv(htmlcode, i) - b = b + 1 - print('[' + str(b) + '/' + str(c) + ']', 'writed', getName(htmlcode)) - except: - print('error') - b = b + 1 - continue - -if os.path.exists(filename) == False: - print('create file') - create_csv() - -a = 198 -while a <= 202: - print('page:', a) - main('https://www.javbus.com/actresses/' + str(a)) - print(datetime.datetime.now().strftime("%Y.%m.%d-%H:%M:%S")) - a = a + 1