Delete girl.py
This commit is contained in:
		
							parent
							
								
									a8255adbb8
								
							
						
					
					
						commit
						11a9ab6b51
					
				
							
								
								
									
										90
									
								
								girl.py
									
									
									
									
									
								
							
							
						
						
									
										90
									
								
								girl.py
									
									
									
									
									
								
							| @ -1,90 +0,0 @@ | ||||
| # coding=utf-8 | ||||
| import csv | ||||
| import re | ||||
| from lxml import etree | ||||
| import json | ||||
| from bs4 import BeautifulSoup | ||||
| from ADC_function import * | ||||
| import os | ||||
| import datetime | ||||
| 
 | ||||
| def getActorURL(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||
|     result1 = html.xpath('//*[@id="waterfall"]/div/a/@href') | ||||
|     return result1 | ||||
| # ===== | ||||
| def getName(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[2]/span/text()')).strip(" ['']") | ||||
|     return result1 | ||||
| def getActorPhotoURL(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//*[@id="waterfall"]/div[1]/div/div[1]/img/@src')).strip(" ['']") | ||||
|     return result1 | ||||
| def getBirthday(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//p[contains(text(),"生日: ")]/text()')).strip(" ['']") | ||||
|     return result1 | ||||
| def getAge(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//p[contains(text(),"年齡: ")]/text()')).strip(" ['']") | ||||
|     return result1 | ||||
| def getHigh(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//p[contains(text(),"身高: ")]/text()')).strip(" ['']") | ||||
|     return result1 | ||||
| def getCup(htmlcode): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath('//p[contains(text(),"罩杯: ")]/text()')).strip(" ['']") | ||||
|     return result1 | ||||
| def getInfo(htmlcode,xpath): | ||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||
|     result1 = str(html.xpath(xpath)).strip(" ['']") | ||||
|     return result1 | ||||
| 
 | ||||
| # ===== | ||||
| 
 | ||||
| filename = '2.csv' | ||||
| 
 | ||||
| def create_csv(): | ||||
|     path = filename | ||||
|     with open(path, 'w') as f: | ||||
|         print("名称,头像URL,个人URL,生日,年龄,身高,罩杯", file=f, ) | ||||
| 
 | ||||
| 
 | ||||
| def write_csv(htmlcode, url): | ||||
|     path = filename | ||||
|     with open(path, 'a+') as f: | ||||
|         print(getName(htmlcode), end=',', file=f) | ||||
|         print(getActorPhotoURL(htmlcode), end=',', file=f) | ||||
|         print(url, end=',', file=f) | ||||
|         print(getBirthday(htmlcode).strip('生日: '), end=',', file=f) | ||||
|         print(getAge(htmlcode).strip('年齡: '), end=',', file=f) | ||||
|         print(getHigh(htmlcode).strip('身高: ').strip('cm'), end=',', file=f) | ||||
|         print(getCup(htmlcode).strip('罩杯: '), file=f) | ||||
| 
 | ||||
| def main(url): | ||||
|     actor_list = getActorURL(get_html(url)) | ||||
|     b = 0 | ||||
|     c = len(actor_list) | ||||
|     for i in actor_list: | ||||
|         try: | ||||
|             htmlcode = get_html(i) | ||||
|             write_csv(htmlcode, i) | ||||
|             b = b + 1 | ||||
|             print('[' + str(b) + '/' + str(c) + ']', 'writed', getName(htmlcode)) | ||||
|         except: | ||||
|             print('error') | ||||
|             b = b + 1 | ||||
|             continue | ||||
| 
 | ||||
| if os.path.exists(filename) == False: | ||||
|     print('create file') | ||||
|     create_csv() | ||||
| 
 | ||||
| a = 198 | ||||
| while a <= 202: | ||||
|     print('page:', a) | ||||
|     main('https://www.javbus.com/actresses/' + str(a)) | ||||
|     print(datetime.datetime.now().strftime("%Y.%m.%d-%H:%M:%S")) | ||||
|     a = a + 1 | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user