Delete girl.py
This commit is contained in:
parent
a8255adbb8
commit
11a9ab6b51
90
girl.py
90
girl.py
@ -1,90 +0,0 @@
|
|||||||
# coding=utf-8
|
|
||||||
import csv
|
|
||||||
import re
|
|
||||||
from lxml import etree
|
|
||||||
import json
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
from ADC_function import *
|
|
||||||
import os
|
|
||||||
import datetime
|
|
||||||
|
|
||||||
def getActorURL(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser()) # //table/tr[1]/td[1]/text()
|
|
||||||
result1 = html.xpath('//*[@id="waterfall"]/div/a/@href')
|
|
||||||
return result1
|
|
||||||
# =====
|
|
||||||
def getName(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[2]/span/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getActorPhotoURL(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getBirthday(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result1 = str(html.xpath('//p[contains(text(),"生日: ")]/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getAge(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result1 = str(html.xpath('//p[contains(text(),"年齡: ")]/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getHigh(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result1 = str(html.xpath('//p[contains(text(),"身高: ")]/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getCup(htmlcode):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result1 = str(html.xpath('//p[contains(text(),"罩杯: ")]/text()')).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
def getInfo(htmlcode,xpath):
|
|
||||||
html = etree.fromstring(htmlcode, etree.HTMLParser())
|
|
||||||
result1 = str(html.xpath(xpath)).strip(" ['']")
|
|
||||||
return result1
|
|
||||||
|
|
||||||
# =====
|
|
||||||
|
|
||||||
filename = '2.csv'
|
|
||||||
|
|
||||||
def create_csv():
|
|
||||||
path = filename
|
|
||||||
with open(path, 'w') as f:
|
|
||||||
print("名称,头像URL,个人URL,生日,年龄,身高,罩杯", file=f, )
|
|
||||||
|
|
||||||
|
|
||||||
def write_csv(htmlcode, url):
|
|
||||||
path = filename
|
|
||||||
with open(path, 'a+') as f:
|
|
||||||
print(getName(htmlcode), end=',', file=f)
|
|
||||||
print(getActorPhotoURL(htmlcode), end=',', file=f)
|
|
||||||
print(url, end=',', file=f)
|
|
||||||
print(getBirthday(htmlcode).strip('生日: '), end=',', file=f)
|
|
||||||
print(getAge(htmlcode).strip('年齡: '), end=',', file=f)
|
|
||||||
print(getHigh(htmlcode).strip('身高: ').strip('cm'), end=',', file=f)
|
|
||||||
print(getCup(htmlcode).strip('罩杯: '), file=f)
|
|
||||||
|
|
||||||
def main(url):
|
|
||||||
actor_list = getActorURL(get_html(url))
|
|
||||||
b = 0
|
|
||||||
c = len(actor_list)
|
|
||||||
for i in actor_list:
|
|
||||||
try:
|
|
||||||
htmlcode = get_html(i)
|
|
||||||
write_csv(htmlcode, i)
|
|
||||||
b = b + 1
|
|
||||||
print('[' + str(b) + '/' + str(c) + ']', 'writed', getName(htmlcode))
|
|
||||||
except:
|
|
||||||
print('error')
|
|
||||||
b = b + 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if os.path.exists(filename) == False:
|
|
||||||
print('create file')
|
|
||||||
create_csv()
|
|
||||||
|
|
||||||
a = 198
|
|
||||||
while a <= 202:
|
|
||||||
print('page:', a)
|
|
||||||
main('https://www.javbus.com/actresses/' + str(a))
|
|
||||||
print(datetime.datetime.now().strftime("%Y.%m.%d-%H:%M:%S"))
|
|
||||||
a = a + 1
|
|
Loading…
Reference in New Issue
Block a user