Spaces:
Running
Running
import re | |
from openpyxl import load_workbook | |
import difflib | |
import os | |
base_dir = './abcfile/' | |
def read_music_file(music_id): | |
""" | |
给定 music_id 和文件夹路径,返回以该 music_id 命名的文件夹中的 music.abc 文件内容 | |
:param music_id: 需要查找的 music_id | |
:param base_dir: 存放所有以 music_id 命名的文件夹的目录路径 | |
:return: music.abc 文件内容 | |
""" | |
# 构建目标文件夹的路径 | |
folder_path = os.path.join(base_dir, music_id) | |
# 检查文件夹是否存在 | |
if not os.path.isdir(folder_path): | |
return f"文件夹 '{folder_path}' 不存在" | |
# 构建目标文件的路径 | |
file_path = os.path.join(folder_path, 'music.abc') | |
# 检查 music.abc 文件是否存在 | |
if not os.path.isfile(file_path): | |
return f"文件 '{file_path}' 不存在" | |
# 读取文件内容 | |
try: | |
with open(file_path, 'r', encoding='utf-8') as file: | |
content = file.read() | |
return content | |
except Exception as e: | |
return f"读取文件时出错: {e}" | |
def read_excel(): | |
# 读取Excel文件 | |
wb = load_workbook('music_id.xlsx') | |
# 获取指定表 | |
ws = wb['Results'] | |
# 创建一个music_id到title的字典 | |
music_dict = {} | |
# 读取数据,并填充字典,假设第一行为表头 | |
for row in ws.iter_rows(min_row=2, values_only=True): # 从第二行开始,跳过表头 | |
title = row[0] # 第一列为 title | |
music_id = row[1] # 第二列为 music_id | |
if music_id and title: # 确保数据有效 | |
music_dict[title] = music_id | |
return music_dict | |
def preprocess(s): | |
# 去掉数字和括号内容 | |
return re.sub(r'\d+|(.*?)', '', s).strip() | |
def is_search(text): | |
#定义正则表达式,匹配 <任意内容> 的部分 | |
#pattern = r"[<《]([^>》]+)[>》]" | |
# 检查是否存在匹配 | |
#match = re.search(pattern, text) | |
#print(match) | |
#if True: # 只有在有匹配的情况下才继续执行 | |
music_dict = read_excel() | |
#processed_music_dict = {preprocess(key): value for key, value in music_dict.items()} | |
matched_content = text#match.group(1) | |
# 使用 difflib 进行模糊匹配,返回最接近的匹配项列表 | |
title_matches = difflib.get_close_matches(matched_content, music_dict.keys(), n=1, cutoff=0.3) | |
if len(title_matches) > 0: | |
print(title_matches) | |
# 如果有匹配项,返回字典中的对应值 | |
if title_matches: | |
closest_match = title_matches[0] # 获取最接近的匹配项 | |
music_id = music_dict.get(closest_match, text) | |
result = read_music_file(music_id) | |
return result | |
else: | |
return False | |
else: | |
return False | |
#text = "四分音符是什么" | |
#print(is_search(text)) |