Spaces:

hHoai
/

Vietnamese_correction

Sleeping

App Files Files Community

Vietnamese_correction / app.py

hHoai

Update app.py

e15bbee verified 11 days ago

raw

history blame contribute delete

2.26 kB

	import streamlit as st
	from transformers import pipeline

	MAX_LENGTH = 512

	# Load the spell corrector model
	corrector = pipeline("text2text-generation", model="Diezu/bat_pho_bo")

	def find_mistake_positions(original, corrected):
	"""
	Tìm vị trí các từ bị sửa trong câu gốc.
	original: Câu gốc (chuỗi)
	corrected: Câu đã được sửa (chuỗi)
	Trả về danh sách các vị trí từ sai (start_index, end_index)
	"""
	positions = []
	orig_tokens = original.split() # Tách từ câu gốc
	corr_tokens = corrected.split() # Tách từ câu sửa

	orig_idx = 0 # Chỉ số ký tự bắt đầu trong câu gốc
	for orig_word, corr_word in zip(orig_tokens, corr_tokens):
	if orig_word != corr_word: # Nếu từ bị thay đổi
	start_index = original.find(orig_word, orig_idx) + 1 # Tìm vị trí từ trong câu gốc
	end_index = start_index + len(orig_word) - 1 # Tính vị trí kết thúc
	positions.append((start_index, end_index)) # Ghi lại (start, end)
	orig_idx += len(orig_word) + 1 # Cập nhật chỉ số (bao gồm khoảng trắng)

	return positions

	# Streamlit app layout
	st.title("Ứng Dụng Sửa Lỗi Chính Tả")
	st.write("Nhập văn bản và nhận kết quả sửa lỗi chính tả!")

	# Input text box for user to enter a sentence
	input_text = st.text_area("Nhập câu gốc:", placeholder="Hôm nay toi di hoc rất vui, gặp gỡ nhiêu ban be mơi.")

	if st.button("Sửa lỗi"):
	# Perform batch prediction
	predictions = corrector([input_text], max_length=MAX_LENGTH)

	# Get the corrected text and find mistake positions
	corrected_text = predictions[0]["generated_text"]
	mistake_positions = find_mistake_positions(input_text, corrected_text)

	# Display the results
	st.subheader("Kết quả sửa lỗi:")
	st.write(f"Câu gốc: {input_text}")
	st.write(f"Câu sửa: {corrected_text}")

	st.subheader("Vị trí các từ sai (start_index, end_index):")
	if mistake_positions:
	for start, end in mistake_positions:
	st.write(f"Vị trí từ sai: {start} - {end}")
	else:
	st.write("Không phát hiện từ sai.")