import re import fuckit class PathNameProcessor: # 类变量 pattern_of_file_name_suffixes = r'.(mov|mp4|avi|rmvb|wmv|mov|mkv|flv|ts|m2ts)$' # def __init__(self): @staticmethod def remove_distractions(origin_name): """移除干扰项""" # 移除文件类型后缀 origin_name = re.sub(PathNameProcessor.pattern_of_file_name_suffixes, '', origin_name, 0, re.IGNORECASE) # 处理包含减号-和_的番号'/-070409_621' origin_name = re.sub(r'[-_~*# ]', "-", origin_name, 0) origin_name = re.sub(r'(Carib)(bean)?', '-', origin_name, 0, re.IGNORECASE) origin_name = re.sub(r'(1pondo)', '-', origin_name, 0, re.IGNORECASE) origin_name = re.sub(r'(tokyo)[-. ]?(hot)', '-', origin_name, 0, re.IGNORECASE) origin_name = re.sub(r'Uncensored', '-', origin_name, 0, re.IGNORECASE) origin_name = re.sub(r'JAV', '-', origin_name, 0, re.IGNORECASE) # 移除干扰字段 origin_name = origin_name.replace('22-sht.me', '-') # 去除文件名中时间 1970-2099年 月 日 pattern_of_date = r'(?:-)(19[789]\d|20\d{2})(-?(0\d|1[012])-?(0[1-9]|[12]\d|3[01])?)?[-.]' # 移除字母开头 清晰度相关度 字符 pattern_of_resolution_alphas = r'(? NTTR-037 , SIVR-00008 -> SIVR-008 ,但是heyzo除外 if "heyzo" not in name.lower(): searched = re.search(r'([a-zA-Z]{2,})-(?:0*)(\d{3,})', name) if searched: name = '-'.join(searched.groups()) return episode, name @staticmethod def extract_episode_behind_code(origin_name, code): episode = None with fuckit: # 零宽断言获取尾部字幕 剧集数 abc123 result_dict = re.search(rf'(?<={code})-?((?P([A-Z](?![A-Z])))|(?P\d(?!\d)))', origin_name, re.I).groupdict() episode = result_dict['alpha'] or result_dict['num'] return episode def safe_list_get(list_in, idx, default): try: return list_in[idx] except IndexError: return default