Spaces:

fistyyy
/

Music_LMMs

Running

Music_LMMs / music_search.py

fistyee

update

d0c2b7c 3 months ago

2.84 kB

	import re
	from openpyxl import load_workbook
	import difflib
	import os
	base_dir = './abcfile/'

	def read_music_file(music_id):
	"""
	给定 music_id 和文件夹路径，返回以该 music_id 命名的文件夹中的 music.abc 文件内容
	:param music_id: 需要查找的 music_id
	:param base_dir: 存放所有以 music_id 命名的文件夹的目录路径
	:return: music.abc 文件内容
	"""
	# 构建目标文件夹的路径
	folder_path = os.path.join(base_dir, music_id)

	# 检查文件夹是否存在
	if not os.path.isdir(folder_path):
	return f"文件夹 '{folder_path}' 不存在"

	# 构建目标文件的路径
	file_path = os.path.join(folder_path, 'music.abc')

	# 检查 music.abc 文件是否存在
	if not os.path.isfile(file_path):
	return f"文件 '{file_path}' 不存在"

	# 读取文件内容
	try:
	with open(file_path, 'r', encoding='utf-8') as file:
	content = file.read()
	return content
	except Exception as e:
	return f"读取文件时出错: {e}"

	def read_excel():
	# 读取Excel文件
	wb = load_workbook('music_id.xlsx')

	# 获取指定表
	ws = wb['Results']
	# 创建一个music_id到title的字典
	music_dict = {}

	# 读取数据，并填充字典，假设第一行为表头
	for row in ws.iter_rows(min_row=2, values_only=True): # 从第二行开始，跳过表头
	title = row[0] # 第一列为 title
	music_id = row[1] # 第二列为 music_id
	if music_id and title: # 确保数据有效
	music_dict[title] = music_id
	return music_dict

	def preprocess(s):
	# 去掉数字和括号内容
	return re.sub(r'\d+\|（.*?）', '', s).strip()

	def is_search(text):
	#定义正则表达式，匹配 <任意内容> 的部分
	#pattern = r"[<《]([^>》]+)[>》]"
	# 检查是否存在匹配
	#match = re.search(pattern, text)
	#print(match)
	#if True: # 只有在有匹配的情况下才继续执行
	music_dict = read_excel()
	#processed_music_dict = {preprocess(key): value for key, value in music_dict.items()}
	matched_content = text#match.group(1)
	# 使用 difflib 进行模糊匹配，返回最接近的匹配项列表
	title_matches = difflib.get_close_matches(matched_content, music_dict.keys(), n=1, cutoff=0.3)
	if len(title_matches) > 0:
	print(title_matches)
	# 如果有匹配项，返回字典中的对应值
	if title_matches:
	closest_match = title_matches[0] # 获取最接近的匹配项
	music_id = music_dict.get(closest_match, text)
	result = read_music_file(music_id)
	return result
	else:
	return False
	else:
	return False

	#text = "四分音符是什么"

	#print(is_search(text))