Beta 10.6更新
This commit is contained in:
		
							parent
							
								
									7c16307643
								
							
						
					
					
						commit
						575a710ef8
					
				| @ -6,7 +6,7 @@ import sys | |||||||
| from ADC_function import * | from ADC_function import * | ||||||
| import json | import json | ||||||
| 
 | 
 | ||||||
| version='0.10.5' | version='0.10.6' | ||||||
| 
 | 
 | ||||||
| def UpdateCheck(): | def UpdateCheck(): | ||||||
|     html2 = get_html('https://raw.githubusercontent.com/wenead99/AV_Data_Capture/master/update_check.json') |     html2 = get_html('https://raw.githubusercontent.com/wenead99/AV_Data_Capture/master/update_check.json') | ||||||
|  | |||||||
							
								
								
									
										27
									
								
								core.py
									
									
									
									
									
								
							
							
						
						
									
										27
									
								
								core.py
									
									
									
									
									
								
							| @ -55,6 +55,7 @@ def getNumberFromFilename(filepath): | |||||||
|     global cover |     global cover | ||||||
|     global imagecut |     global imagecut | ||||||
|     global tag |     global tag | ||||||
|  |     global image_main | ||||||
| 
 | 
 | ||||||
|     global naming_rule |     global naming_rule | ||||||
|     global location_rule |     global location_rule | ||||||
| @ -122,19 +123,19 @@ def getNumberFromFilename(filepath): | |||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|         title     = json_data['title'] |         title      = json_data['title'] | ||||||
|         studio    = json_data['studio'] |         studio     = json_data['studio'] | ||||||
|         year      = json_data['year'] |         year       = json_data['year'] | ||||||
|         outline   = json_data['outline'] |         outline    = json_data['outline'] | ||||||
|         runtime   = json_data['runtime'] |         runtime    = json_data['runtime'] | ||||||
|         director  = json_data['director'] |         director   = json_data['director'] | ||||||
|         actor_list= str(json_data['actor']).strip("[ ]").replace("'",'').replace(" ",'').split(',') #字符串转列表 |         actor_list = str(json_data['actor']).strip("[ ]").replace("'",'').replace(" ",'').split(',') #字符串转列表 | ||||||
|         release   = json_data['release'] |         release    = json_data['release'] | ||||||
|         number    = json_data['number'] |         number     = json_data['number'] | ||||||
|         cover     = json_data['cover'] |         cover      = json_data['cover'] | ||||||
|         imagecut  = json_data['imagecut'] |         imagecut   = json_data['imagecut'] | ||||||
|         tag       = str(json_data['tag']).strip("[ ]").replace("'",'').replace(" ",'').split(',')   #字符串转列表 |         tag        = str(json_data['tag']).strip("[ ]").replace("'",'').replace(" ",'').split(',')   #字符串转列表 | ||||||
|         actor = str(actor_list).strip("[ ]").replace("'",'').replace(" ",'') |         actor      = str(actor_list).strip("[ ]").replace("'",'').replace(" ",'') | ||||||
| 
 | 
 | ||||||
|         #====================处理异常字符====================== #\/:*?"<>| |         #====================处理异常字符====================== #\/:*?"<>| | ||||||
|         #if "\\" in title or "/" in title or ":" in title or "*" in title or "?" in title or '"' in title or '<' in title or ">" in title or "|" in title or len(title) > 200: |         #if "\\" in title or "/" in title or ":" in title or "*" in title or "?" in title or '"' in title or '<' in title or ">" in title or "|" in title or len(title) > 200: | ||||||
|  | |||||||
							
								
								
									
										101
									
								
								siro.py
									
									
									
									
									
								
							
							
						
						
									
										101
									
								
								siro.py
									
									
									
									
									
								
							| @ -8,81 +8,92 @@ from ADC_function import * | |||||||
| def getTitle(a): | def getTitle(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser()) | ||||||
|     result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']") |     result = str(html.xpath('//*[@id="center_column"]/div[2]/h1/text()')).strip(" ['']") | ||||||
|     return result |     return result.replace('/',',') | ||||||
| def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() | def getActor(a): #//*[@id="center_column"]/div[2]/div[1]/div/table/tbody/tr[1]/td/text() | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() |     html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() | ||||||
|     result2=str(html.xpath('//table/tr[1]/td[1]/text()')).strip(" ['\\n                                        ']") |     result1=str(html.xpath('//th[contains(text(),"出演:")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n') | ||||||
|     result1 = str(html.xpath('//table/tr[1]/td[1]/a/text()')).strip(" ['\\n                                        ']") |     result2=str(html.xpath('//th[contains(text(),"出演:")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n') | ||||||
|     return str(result1+result2).strip('+') |     return str(result1+result2).strip('+').replace("', '",'').replace('"','').replace('/',',') | ||||||
| def getStudio(a): | def getStudio(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser()) #//table/tr[1]/td[1]/text() | ||||||
|     result2=str(html.xpath('//table[2]/tr[2]/td/text()')).strip(" ['\\n                                        ']") |     result1=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n') | ||||||
|     result1 = str(html.xpath('//table/tr[2]/td[1]/a/text()')).strip(" ['\\n                                        ']") |     result2=str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n') | ||||||
|     return str(result1+result2).strip('+') |     return str(result1+result2).strip('+').replace("', '",'').replace('"','') | ||||||
| def getRuntime(a): | def getRuntime(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||||
|     result2=str(html.xpath('//table/tr[3]/td[1]/text()')).strip(" ['\\n                                        ']") |     result1 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip('\\n') | ||||||
|     result1 = str(html.xpath('//table/tr[3]/td[1]/a/text()')).strip(" ['\\n                                        ']") |     result2 = str(html.xpath('//th[contains(text(),"収録時間:")]/../td/text()')).strip(" ['']").strip('\\n    ').strip('\\n') | ||||||
|     return str(result1 + result2).strip('+').strip('mi') |     return str(result1 + result2).strip('+').rstrip('mi') | ||||||
| def getLabel(a): | def getLabel(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||||
|     result2=str(html.xpath('//table/tr[6]/td[1]/text()')).strip(" ['\\n                                        ']") |     result1 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|     result1 = str(html.xpath('//table/tr[6]/td[1]/a/text()')).strip(" ['\\n                                        ']") |         '\\n') | ||||||
|     return str(result1 + result2).strip('+') |     result2 = str(html.xpath('//th[contains(text(),"シリーズ:")]/../td/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|  |         '\\n') | ||||||
|  |     return str(result1 + result2).strip('+').replace("', '",'').replace('"','') | ||||||
| def getNum(a): | def getNum(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||||
|     result2=str(html.xpath('//table/tr[2]/td[4]/a/text()')).strip(" ['\\n                                        ']") |     result1 = str(html.xpath('//th[contains(text(),"品番:")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|     result1 = str(html.xpath('//table/tr[2]/td[4]/text()')).strip(" ['\\n                                        ']") |         '\\n') | ||||||
|  |     result2 = str(html.xpath('//th[contains(text(),"品番:")]/../td/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|  |         '\\n') | ||||||
|     return str(result1 + result2).strip('+') |     return str(result1 + result2).strip('+') | ||||||
| def getYear(a): | def getYear(getRelease): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     try: | ||||||
|     result2=str(html.xpath('//table/tr[2]/td[5]/a/text()')).strip(" ['\\n                                        ']") |         result = str(re.search('\d{4}',getRelease).group()) | ||||||
|     result1=str(html.xpath('//table/tr[2]/td[5]/text()')).strip(" ['\\n                                        ']") |         return result | ||||||
|     return result2+result1 |     except: | ||||||
|  |         return getRelease | ||||||
| def getRelease(a): | def getRelease(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||||
|     result2=str(html.xpath('//table/tr[5]/td[1]/text()')).strip(" ['\\n                                        ']") |     result1 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|     result1 = str(html.xpath('//table/tr[5]/a/td[1]/text()')).strip(" ['\\n                                        ']") |         '\\n') | ||||||
|  |     result2 = str(html.xpath('//th[contains(text(),"配信開始日:")]/../td/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|  |         '\\n') | ||||||
|     return str(result1 + result2).strip('+') |     return str(result1 + result2).strip('+') | ||||||
| def getTag(a): | def getTag(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||||
|     result2=str(html.xpath('//table/tr[8]/td[1]/a/text()')).strip(" ['\\n                                        ']") |     result1 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|     result1=str(html.xpath('//table/tr[8]/td[1]/text()')).strip(" ['\\n                                        ']") |         '\\n') | ||||||
|     return str(result1 + result2).strip('+') |     result2 = str(html.xpath('//th[contains(text(),"ジャンル:")]/../td/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|  |         '\\n') | ||||||
|  |     return str(result1 + result2).strip('+').replace("', '\\n",",").replace("', '","").replace('"','') | ||||||
| def getCover(htmlcode): | def getCover(htmlcode): | ||||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) |     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||||
|     result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']") |     result = str(html.xpath('//*[@id="center_column"]/div[2]/div[1]/div/div/h2/img/@src')).strip(" ['']") | ||||||
|     return result |     return result | ||||||
| def getDirector(a): | def getDirector(a): | ||||||
|     html = etree.fromstring(a, etree.HTMLParser()) |     html = etree.fromstring(a, etree.HTMLParser())  # //table/tr[1]/td[1]/text() | ||||||
|     result1 = str(html.xpath('//table/tr[2]/td[1]/text()')).strip(" ['\\n                                        ']") |     result1 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/a/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|     result2 = str(html.xpath('//table/tr[2]/td[1]/a/text()')).strip(" ['\\n                                        ']") |         '\\n') | ||||||
|     return str(result1 + result2).strip('+') |     result2 = str(html.xpath('//th[contains(text(),"シリーズ")]/../td/text()')).strip(" ['']").strip('\\n    ').strip( | ||||||
|  |         '\\n') | ||||||
|  |     return str(result1 + result2).strip('+').replace("', '",'').replace('"','') | ||||||
| def getOutline(htmlcode): | def getOutline(htmlcode): | ||||||
|     html = etree.fromstring(htmlcode, etree.HTMLParser()) |     html = etree.fromstring(htmlcode, etree.HTMLParser()) | ||||||
|     result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") |     result = str(html.xpath('//*[@id="introduction"]/dd/p[1]/text()')).strip(" ['']") | ||||||
|     return result |     return result | ||||||
| def main(number): | def main(number2): | ||||||
|     htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number),cookies={'adc':'1'}) |     number=number2.upper() | ||||||
|  |     htmlcode=get_html('https://www.mgstage.com/product/product_detail/'+str(number)+'/',cookies={'adc':'1'}) | ||||||
|     soup = BeautifulSoup(htmlcode, 'lxml') |     soup = BeautifulSoup(htmlcode, 'lxml') | ||||||
|     a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','') |     a = str(soup.find(attrs={'class': 'detail_data'})).replace('\n                                        ','').replace('                                ','').replace('\n                            ','').replace('\n                        ','') | ||||||
|     #print(a) |  | ||||||
|     dic = { |     dic = { | ||||||
|         'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''), |         'title': getTitle(htmlcode).replace("\\n",'').replace('        ',''), | ||||||
|         'studio': getStudio(a), |         'studio': getStudio(a), | ||||||
|         'year': str(re.search('\d{4}',getRelease(a)).group()), |  | ||||||
|         'outline': getOutline(htmlcode), |         'outline': getOutline(htmlcode), | ||||||
|         'runtime': getRuntime(a), |         'runtime': getRuntime(a), | ||||||
|         'director': getDirector(a), |         'director': getDirector(a), | ||||||
|         'actor': getActor(a), |         'actor': getActor(a), | ||||||
|         'release': getRelease(a), |         'release': getRelease(a), | ||||||
|         'number': number, |         'number': getNum(a), | ||||||
|         'cover': getCover(htmlcode), |         'cover': getCover(htmlcode), | ||||||
|         'imagecut': 0, |         'imagecut': 0, | ||||||
|         'tag': getTag(a).replace("'\\n',",'').replace(' ', '').replace("\\n','\\n",','), |         'tag': getTag(a), | ||||||
|         'label':getLabel(a) |         'label':getLabel(a), | ||||||
|  |         'year': getYear(getRelease(a)),  # str(re.search('\d{4}',getRelease(a)).group()), | ||||||
|     } |     } | ||||||
|     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') |     js = json.dumps(dic, ensure_ascii=False, sort_keys=True, indent=4, separators=(',', ':'),)#.encode('UTF-8') | ||||||
|     #print('https://www.mgstage.com/product/product_detail/'+str(number)) |  | ||||||
|     return js |     return js | ||||||
| #print(main('SIRO-3552')) | 
 | ||||||
|  | #print(main('200GANA-1624')) | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user