File size: 1,552 Bytes
e729704
 
0f90cb6
 
 
e729704
661320e
e729704
0f90cb6
e729704
0f90cb6
 
e729704
ae504c8
 
 
e729704
0f90cb6
e729704
0f90cb6
e729704
 
 
 
0f90cb6
 
219e79a
 
 
7c479d2
219e79a
e729704
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
from huggingface_hub import InferenceClient
import gradio as gr
import re

hugging_face_model_path = "cofeg/Finetuned-Xunzi-Qwen2-1.5B-for-ancient-text-generation"

client = InferenceClient(model=hugging_face_model_path, token=os.getenv('HUGGING_FACE_TOKEN'))

def split_and_generate(modern_text):
    # Split the input text into sentences for the model is trained on sentence pairs
    sentences = re.findall(r'[^。!?]*[。!?]', modern_text)

    # If no sentences are found, treat the entire input as one sentence
    if not sentences:
        sentences = [modern_text]

    responses = ""
    for sentence in sentences:
        input = "现代文:" + sentence + " 古文:"
        for token in client.text_generation(input, max_new_tokens=128, stream=True):
            if token != "<|endoftext|>":
                responses += token
                yield responses

demo = gr.Interface(fn=split_and_generate,
                    inputs=[gr.Textbox(label="现代文", lines=10)],
                    outputs=[gr.Textbox(label="古文", lines=10)],
                    title="现代文转古文大模型",
                    description="请在左边对话框输入你要转换的现代文并点击“Submit”按钮,右边的对话框将显示转换后的古文。首次使用较慢,后面很快。<br>一个句子不要太长,如果文本很长,可多分几个句子,模型会逐句转化。<br>详情请访问本项目[GitHub主页](https://github.com/JianXiao2021/ancient_text_generation_LLM)。"
)
demo.launch()