Spaces:

linxy
/

oh-my-words

Runtime error

App Files Files Community

linxy commited on Oct 29, 2023

Commit

bb48ea5

1 Parent(s): 218f679

init

Browse files

Files changed (15) hide show

.gitignore +1 -0
LLM.py +420 -0
app copy.py +96 -0
app.py +25 -68
common/util.py +31 -0
database/__init__.py +11 -0
database/constant.py +2 -0
database/database.py +10 -0
database/model.py +37 -0
database/operation.py +704 -0
database/schema.py +209 -0
memorize.py +264 -0
output/logs/.gitkeep +0 -0
story_agent.py +69 -0
web.py +696 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ */.pyc

LLM.py ADDED Viewed

	@@ -0,0 +1,420 @@

+# encoding=utf-8
+import multiprocessing as mp
+import warnings
+import requests
+import tiktoken
+from tqdm import tqdm
+from dataclasses import dataclass, field
+from typing import (
+    AbstractSet,
+    Any,
+    Callable,
+    Collection,
+    Dict,
+    Generator,
+    List,
+    Literal,
+    Mapping,
+    Optional,
+    Set,
+    Tuple,
+    Union,
+)
+from pydantic import Extra, Field, root_validator
+from loguru import logger
+from langchain.llms.base import BaseLLM
+from langchain.schema import Generation, LLMResult
+from langchain.utils import get_from_dict_or_env
+from langchain.callbacks.manager import (
+    AsyncCallbackManagerForLLMRun,
+    CallbackManagerForLLMRun,
+)
+import sys
+import json
+@dataclass(frozen=True)
+class ChatGPTConfig:
+    r"""Defines the parameters for generating chat completions using the
+    OpenAI API.
+    Args:
+        temperature (float, optional): Sampling temperature to use, between
+            :obj:`0` and :obj:`2`. Higher values make the output more random,
+            while lower values make it more focused and deterministic.
+            (default: :obj:`0.2`)
+        top_p (float, optional): An alternative to sampling with temperature,
+            called nucleus sampling, where the model considers the results of
+            the tokens with top_p probability mass. So :obj:`0.1` means only
+            the tokens comprising the top 10% probability mass are considered.
+            (default: :obj:`1.0`)
+        n (int, optional): How many chat completion choices to generate for
+            each input message. ()default: :obj:`1`)
+        stream (bool, optional): If True, partial message deltas will be sent
+            as data-only server-sent events as they become available.
+            (default: :obj:`False`)
+        stop (str or list, optional): Up to :obj:`4` sequences where the API
+            will stop generating further tokens. (default: :obj:`None`)
+        max_tokens (int, optional): The maximum number of tokens to generate
+            in the chat completion. The total length of input tokens and
+            generated tokens is limited by the model's context length.
+            (default: :obj:`None`)
+        presence_penalty (float, optional): Number between :obj:`-2.0` and
+            :obj:`2.0`. Positive values penalize new tokens based on whether
+            they appear in the text so far, increasing the model's likelihood
+            to talk about new topics. See more information about frequency and
+            presence penalties. (default: :obj:`0.0`)
+        frequency_penalty (float, optional): Number between :obj:`-2.0` and
+            :obj:`2.0`. Positive values penalize new tokens based on their
+            existing frequency in the text so far, decreasing the model's
+            likelihood to repeat the same line verbatim. See more information
+            about frequency and presence penalties. (default: :obj:`0.0`)
+        logit_bias (dict, optional): Modify the likelihood of specified tokens
+            appearing in the completion. Accepts a json object that maps tokens
+            (specified by their token ID in the tokenizer) to an associated
+            bias value from :obj:`-100` to :obj:`100`. Mathematically, the bias
+            is added to the logits generated by the model prior to sampling.
+            The exact effect will vary per model, but values between:obj:` -1`
+            and :obj:`1` should decrease or increase likelihood of selection;
+            values like :obj:`-100` or :obj:`100` should result in a ban or
+            exclusive selection of the relevant token. (default: :obj:`{}`)
+        user (str, optional): A unique identifier representing your end-user,
+            which can help OpenAI to monitor and detect abuse.
+            (default: :obj:`""`)
+    """
+    temperature: float = 1.0  # openai default: 1.0
+    top_p: float = 1.0
+    max_in_tokens: int = 3200
+    timeout: int = 20
+def get_userid_and_token(
+    url='http://avatar.aicubes.cn/vtuber/auth/api/oauth/v1/login',
+    app_id='6027294018fd496693d0b8c77e2d20a1',
+    app_secret='52806a6fff8a452497061b9dcc5779f4'
+):
+    d = {'app_id': app_id, 'app_secret': app_secret}
+    h = {'Content-Type': 'application/json'}
+    r = requests.post(url, json=d, headers=h)
+    data = r.json()['data']
+    return data['user_id'], data['token']
+class ChatAPI:
+    def __init__(self, timeout=20, verbose=False) -> None:
+        self.timeout = timeout
+        self.verbose = verbose
+        self.user_id, self.token = get_userid_and_token()
+    def create_chat_completion(self, messages: List[Dict[str, str]], model: str, temperature: float, max_tokens=None) -> str:
+        res = self.create_chat_completion_response_data(messages, model, temperature, max_tokens)
+        return res['choices'][0]['message']['content']
+    def create_chat_completion_response_data(self, messages: List[Dict[str, str]], model: str, temperature: float, max_tokens=None):
+        res = self.create_chat_completion_response(messages, model, temperature, max_tokens)
+        res = res.json()['data']
+        return res
+    def create_chat_completion_response(self, messages: List[Dict[str, str]], model: str, temperature: float, max_tokens=None):
+        chat_url = 'http://avatar.aicubes.cn/vtuber/ai_access/chatgpt/v1/chat/completions'
+        chat_header = {
+            'Content-Type': 'application/json',
+            'userId': self.user_id,
+            'token': self.token
+        }
+        payload = {
+            'model': model,
+            'messages': messages,
+            'temperature': temperature,
+            'max_tokens': max_tokens,
+        }
+        timeout = self.timeout
+        res = requests.post(chat_url, json=payload, headers=chat_header, timeout=timeout)
+        if self.verbose:
+            data = res.json()["data"]
+            if data is None:
+                logger.debug(res.json())
+            else:
+                logger.debug(data["choices"][0]["message"]["content"])
+        return res
+class OpenAIChat(BaseLLM):
+    """Wrapper around OpenAI Chat large language models.
+    To use, you should have the ``openai`` python package installed, and the
+    environment variable ``OPENAI_API_KEY`` set with your API key.
+    Any parameters that are valid to be passed to the openai.create call can be passed
+    in, even if not explicitly saved on this class.
+    Example:
+        .. code-block:: python
+            from langchain.llms import OpenAIChat
+            openaichat = OpenAIChat(model_name="gpt-3.5-turbo")
+    """
+    model_name: str = "gpt-3.5-turbo"
+    """Model name to use."""
+    model_kwargs: Dict[str, Any] = Field(default_factory=dict)
+    """Holds any model parameters valid for `create` call not explicitly specified."""
+    max_retries: int = 6
+    """Maximum number of retries to make when generating."""
+    prefix_messages: List = Field(default_factory=list)
+    """Series of messages for Chat input."""
+    streaming: bool = False
+    """Whether to stream the results or not."""
+    allowed_special: Union[Literal["all"], AbstractSet[str]] = set()
+    """Set of special tokens that are allowed。"""
+    disallowed_special: Union[Literal["all"], Collection[str]] = "all"
+    """Set of special tokens that are not allowed。"""
+    api = ChatAPI(timeout=60)
+    generate_verbose: bool = False
+    class Config:
+        """Configuration for this pydantic object."""
+        extra = Extra.ignore
+    @root_validator(pre=True)
+    def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]:
+        """Build extra kwargs from additional params that were passed in."""
+        all_required_field_names = {field.alias for field in cls.__fields__.values()}
+        extra = values.get("model_kwargs", {})
+        for field_name in list(values):
+            if field_name not in all_required_field_names:
+                if field_name in extra:
+                    raise ValueError(f"Found {field_name} supplied twice.")
+                extra[field_name] = values.pop(field_name)
+        values["model_kwargs"] = extra
+        return values
+    @root_validator()
+    def validate_environment(cls, values: Dict) -> Dict:
+        """Validate that api key and python package exists in environment."""
+        return values
+    @property
+    def _default_params(self) -> Dict[str, Any]:
+        """Get the default parameters for calling OpenAI API."""
+        return self.model_kwargs
+    def _get_chat_params(
+        self, prompts: List[str], stop: Optional[List[str]] = None
+    ) -> Tuple:
+        if len(prompts) > 1:
+            raise ValueError(
+                f"OpenAIChat currently only supports single prompt, got {prompts}"
+            )
+        messages = self.prefix_messages + [{"role": "user", "content": prompts[0]}]
+        params: Dict[str, Any] = {**{"model": self.model_name}, **self._default_params}
+        if stop is not None:
+            if "stop" in params:
+                raise ValueError("`stop` found in both the input and default params.")
+            params["stop"] = stop
+        if params.get("max_tokens") == -1:
+            # for ChatGPT api, omitting max_tokens is equivalent to having no limit
+            del params["max_tokens"]
+        return messages, params
+    def _generate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+    ) -> LLMResult:
+        messages, params = self._get_chat_params(prompts, stop)
+        if self.generate_verbose:
+            logger.debug(json.dumps(params, indent=2))
+            for msg in messages:
+                logger.debug(msg["role"] + " : " + msg["content"])
+        resp = self.api.create_chat_completion_response_data(messages, self.model_name, self.model_kwargs['temperature'])
+        full_response = resp
+        llm_output = {
+            "token_usage": full_response["usage"],
+            "model_name": self.model_name,
+        }
+        return LLMResult(
+            generations=[
+                [Generation(text=full_response["choices"][0]["message"]["content"])]
+            ],
+            llm_output=llm_output,
+        )
+    async def _agenerate(
+        self,
+        prompts: List[str],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
+    ) -> LLMResult:
+        # messages, params = self._get_chat_params(prompts, stop)
+        # full_response = await acompletion_with_retry(
+        #     self, messages=messages, **params
+        # )
+        # llm_output = {
+        #     "token_usage": full_response["usage"],
+        #     "model_name": self.model_name,
+        # }
+        # return LLMResult(
+        #     generations=[
+        #         [Generation(text=full_response["choices"][0]["message"]["content"])]
+        #     ],
+        #     llm_output=llm_output,
+        # )
+        raise NotImplementedError("Async not supported for OpenAIChat")
+    @property
+    def _identifying_params(self) -> Mapping[str, Any]:
+        """Get the identifying parameters."""
+        return {**{"model_name": self.model_name}, **self._default_params}
+    @property
+    def _llm_type(self) -> str:
+        """Return type of llm."""
+        return "openai-chat"
+    def get_num_tokens(self, text: str) -> int:
+        """Calculate num tokens with tiktoken package."""
+        # tiktoken NOT supported for Python < 3.8
+        if sys.version_info[1] < 8:
+            return super().get_num_tokens(text)
+        try:
+            import tiktoken
+        except ImportError:
+            raise ValueError(
+                "Could not import tiktoken python package. "
+                "This is needed in order to calculate get_num_tokens. "
+                "Please install it with `pip install tiktoken`."
+            )
+        # create a GPT-3.5-Turbo encoder instance
+        enc = tiktoken.encoding_for_model("gpt-3.5-turbo")
+        # encode the text using the GPT-3.5-Turbo encoder
+        tokenized_text = enc.encode(
+            text,
+            allowed_special=self.allowed_special,
+            disallowed_special=self.disallowed_special,
+        )
+        # calculate the number of tokens in the encoded text
+        return len(tokenized_text)
+class ChatSession:
+    def __init__(self, prompt: str = '', chatgpt_config: ChatGPTConfig = ChatGPTConfig()) -> None:
+        self.chatgpt_config = chatgpt_config.__dict__
+        self.user_id, self.token = self.get_userid_and_token()
+        encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-0301")
+        self.count = lambda x: len(encoding.encode(x))
+        self.history = []
+        self.system = [self.make_msg("system", prompt)] if prompt else []
+    def restart(self, prompt: str = '') -> None:
+        self.system = [self.make_msg("system", prompt)] if prompt else []
+    @staticmethod
+    def make_msg(role: str, msg: str) -> Dict:
+        assert role in {"system", "assistant", "user"}
+        return {"role": role, "content": msg}
+    @staticmethod
+    def get_userid_and_token(
+            url='http://avatar.aicubes.cn/vtuber/auth/api/oauth/v1/login',
+            app_id='6027294018fd496693d0b8c77e2d20a1',
+            app_secret='52806a6fff8a452497061b9dcc5779f4'
+    ):
+        d = {'app_id': app_id, 'app_secret': app_secret}
+        h = {'Content-Type': 'application/json'}
+        r = requests.post(url, json=d, headers=h)
+        data = r.json()['data']
+        return data['user_id'], data['token']
+    def make_chat_session(self, user_id: str, token: str, input_message: List[Dict[str, str]]):
+        chat_h = {
+            'Content-Type': 'application/json',
+            'userId': user_id,
+            'token': token
+        }
+        chat_url = 'http://avatar.aicubes.cn/vtuber/ai_access/chatgpt/v1/chat/completions'
+        res = requests.post(chat_url, json={
+            'messages': input_message, **self.chatgpt_config
+        }, headers=chat_h, timeout=self.chatgpt_config['timeout'])
+        return res.json()['data']['choices'][0]['message']['content']
+    def create_chat_completion(self, messages: List[Dict[str, str]], model: str, temperature: float, max_tokens=None) -> str:
+        chat_url = 'http://avatar.aicubes.cn/vtuber/ai_access/chatgpt/v1/chat/completions'
+        chat_header = {
+            'Content-Type': 'application/json',
+            'userId': self.user_id,
+            'token': self.token
+        }
+        payload = {
+            'model': model,
+            'messages': messages,
+            'temperature': temperature,
+            'max_tokens': max_tokens,
+        }
+        timeout = self.chatgpt_config['timeout']
+        res = requests.post(chat_url, json=payload, headers=chat_header, timeout=timeout)
+        return res.json()['data']['choices'][0]['message']['content']
+    def chat(self, msg: str):
+        self.history.append(self.make_msg("user", msg))
+        init_tokenCnt = self.count(self.system[0]['content']) if self.system else 0
+        inputStaMsgIdx, tokenCnt = len(self.history), init_tokenCnt
+        while inputStaMsgIdx and (
+                tokenCnt := tokenCnt + self.count(self.history[inputStaMsgIdx - 1]['content'])) < \
+                self.chatgpt_config['max_in_tokens']:
+            inputStaMsgIdx -= 1
+        inputStaMsgIdx = inputStaMsgIdx if inputStaMsgIdx < len(self.history) else -1
+        res = self.make_chat_session(self.user_id, self.token, self.system + self.history[inputStaMsgIdx:])
+        self.history.append(self.make_msg("assistant", res))
+        return res
+def batch_chat(info_lst: List, request_num: int = 6) -> List:
+    res = []
+    pool = mp.Pool(processes=request_num)
+    for id, res_text in tqdm(pool.imap(single_chat, info_lst), desc="Asking API", total=len(info_lst)):
+        if res_text:
+            res.append((id, res_text))
+    return res
+def single_chat(info: Dict) -> (int, str):
+    sess = ChatSession(info['sys'], info['config'])
+    try:
+        res = sess.chat(info['query'])
+        return info['id'], res
+    except Exception as e:
+        print(e)
+        return info['id'], ""
+if __name__ == '__main__':
+    sys_prompt = """
+你是一位严格的评分员，我会给你一个指令和这个指令的回复，你需要仔细检查回复并给出分数，你可以从多个角度评判这个回复，比如：
+回复是否准确、是否详尽、是否无害、是否完全符合指令里的要求，等等。分数分为5个等级，1分：完全不可用，2分：不可用但完成了部分指令，
+3分：可用但有明显缺陷，4分：可用但有少许缺陷，5分：可用且没有缺陷。你在工作时需要加入自己的思考，并在最后给出分数。
+下面是一个例子：
+User: \n\n<指令>马云的妻子是谁?</指令>\n\n<回复>马云的妻子是张英琪。</回复>
+Assistant: 这个回复错误，马云是阿里巴巴创始人，他的妻子是张瑛，因此回复错误，因此，我的分数是[1分]。
+"""
+    aaa = """
+fq(xm, m) = (Wqxm)e^(imθ)
+fk(xn, n) = (Wkxn)e^(inθ)
+g(xm, xn, m − n) = Re[(Wqxm)(Wkxn)∗e^(i(m−n)θ)]
+"""
+    prompt = 'User: \n\n<指令>姚明多高</指令>\n\n<回复>18m</回复>\nAssistant:'
+    bbb = "The given equation defines a function g(xm, xn, m-n) in terms of two complex functions fq(xm, m) and fk(xn, n) and their corresponding Fourier coefficients Wq and Wk, respectively. The function g(xm, xn, m-n) takes the real part of the product of the two complex exponential terms with phase angles m-theta and n-theta, respectively, where theta is an arbitrary constant angle. The term (m-n)theta in the exponent indicates that the two exponential terms are shifted by a phase difference of (m-n)theta."
+    session = ChatSession('解释公式的含义')
+    # print(session.chat(aaa))
+    print(session.chat("你是谁？谁创造了你？你的知识截止于什么时候？你可以给自己取一个名字，请告诉我你的名字"))

app copy.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import sqlite3
+import huggingface_hub
+import gradio as gr
+import pandas as pd
+import shutil
+import os
+import datetime
+from apscheduler.schedulers.background import BackgroundScheduler
+DB_FILE = "./app.db"
+TOKEN = os.environ.get('HUB_TOKEN')
+repo = huggingface_hub.Repository(
+    local_dir="data",
+    repo_type="dataset",
+    clone_from="linxy/oh-my-words",
+    use_auth_token=TOKEN
+)
+repo.git_pull()
+# Set db to latest
+shutil.copyfile("./data/app.db", DB_FILE)
+# Create table if it doesn't already exist
+db = sqlite3.connect(DB_FILE)
+try:
+    db.execute("SELECT * FROM reviews").fetchall()
+    db.close()
+except sqlite3.OperationalError:
+    db.execute(
+        '''
+        CREATE TABLE reviews (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
+                              created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
+                              name TEXT, review INTEGER, comments TEXT)
+        ''')
+    db.commit()
+    db.close()
+def get_latest_reviews(db: sqlite3.Connection):
+    reviews = db.execute("SELECT * FROM reviews ORDER BY id DESC limit 10").fetchall()
+    total_reviews = db.execute("Select COUNT(id) from reviews").fetchone()[0]
+    reviews = pd.DataFrame(reviews, columns=["id", "date_created", "name", "review", "comments"])
+    return reviews, total_reviews
+def add_review(name: str, review: int, comments: str):
+    db = sqlite3.connect(DB_FILE)
+    cursor = db.cursor()
+    cursor.execute("INSERT INTO reviews(name, review, comments) VALUES(?,?,?)", [name, review, comments])
+    db.commit()
+    reviews, total_reviews = get_latest_reviews(db)
+    db.close()
+    return reviews, total_reviews
+def load_data():
+    db = sqlite3.connect(DB_FILE)
+    reviews, total_reviews = get_latest_reviews(db)
+    db.close()
+    return reviews, total_reviews
+with gr.Blocks() as demo:
+    with gr.Row():
+        with gr.Column():
+            name = gr.Textbox(label="Name", placeholder="What is your name?")
+            review = gr.Radio(label="How satisfied are you with using gradio?", choices=[1, 2, 3, 4, 5])
+            comments = gr.Textbox(label="Comments", lines=10, placeholder="Do you have any feedback on gradio?")
+            submit = gr.Button(value="Submit Feedback")
+        with gr.Column():
+            with gr.Box():
+                gr.Markdown("Most recently created 10 rows: See full dataset [here](https://huggingface.co/datasets/freddyaboulton/gradio-reviews)")
+                data = gr.Dataframe()
+            count = gr.Number(label="Total number of reviews")
+    submit.click(add_review, [name, review, comments], [data, count])
+    demo.load(load_data, None, [data, count])
+def backup_db():
+    shutil.copyfile(DB_FILE, "./data/reviews.db")
+    db = sqlite3.connect(DB_FILE)
+    reviews = db.execute("SELECT * FROM reviews").fetchall()
+    pd.DataFrame(reviews).to_csv("./data/reviews.csv", index=False)
+    print("updating db")
+    repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
+scheduler = BackgroundScheduler()
+scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
+scheduler.start()
+demo.launch()

app.py CHANGED Viewed

@@ -6,9 +6,10 @@ import shutil
 import os
 import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
-DB_FILE = "./reviews.db"
 TOKEN = os.environ.get('HUB_TOKEN')
 repo = huggingface_hub.Repository(
@@ -18,73 +19,20 @@ repo = huggingface_hub.Repository(
     use_auth_token=TOKEN
 )
 repo.git_pull()
 # Set db to latest
-shutil.copyfile("./data/reviews.db", DB_FILE)
-# Create table if it doesn't already exist
-db = sqlite3.connect(DB_FILE)
-try:
-    db.execute("SELECT * FROM reviews").fetchall()
-    db.close()
-except sqlite3.OperationalError:
-    db.execute(
-        '''
-        CREATE TABLE reviews (id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,
-                              created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP NOT NULL,
-                              name TEXT, review INTEGER, comments TEXT)
-        ''')
-    db.commit()
-    db.close()
-def get_latest_reviews(db: sqlite3.Connection):
-    reviews = db.execute("SELECT * FROM reviews ORDER BY id DESC limit 10").fetchall()
-    total_reviews = db.execute("Select COUNT(id) from reviews").fetchone()[0]
-    reviews = pd.DataFrame(reviews, columns=["id", "date_created", "name", "review", "comments"])
-    return reviews, total_reviews
-def add_review(name: str, review: int, comments: str):
-    db = sqlite3.connect(DB_FILE)
-    cursor = db.cursor()
-    cursor.execute("INSERT INTO reviews(name, review, comments) VALUES(?,?,?)", [name, review, comments])
-    db.commit()
-    reviews, total_reviews = get_latest_reviews(db)
-    db.close()
-    return reviews, total_reviews
-def load_data():
-    db = sqlite3.connect(DB_FILE)
-    reviews, total_reviews = get_latest_reviews(db)
-    db.close()
-    return reviews, total_reviews
-with gr.Blocks() as demo:
-    with gr.Row():
-        with gr.Column():
-            name = gr.Textbox(label="Name", placeholder="What is your name?")
-            review = gr.Radio(label="How satisfied are you with using gradio?", choices=[1, 2, 3, 4, 5])
-            comments = gr.Textbox(label="Comments", lines=10, placeholder="Do you have any feedback on gradio?")
-            submit = gr.Button(value="Submit Feedback")
-        with gr.Column():
-            with gr.Box():
-                gr.Markdown("Most recently created 10 rows: See full dataset [here](https://huggingface.co/datasets/freddyaboulton/gradio-reviews)")
-                data = gr.Dataframe()
-            count = gr.Number(label="Total number of reviews")
-    submit.click(add_review, [name, review, comments], [data, count])
-    demo.load(load_data, None, [data, count])
 def backup_db():
-    shutil.copyfile(DB_FILE, "./data/reviews.db")
-    db = sqlite3.connect(DB_FILE)
-    reviews = db.execute("SELECT * FROM reviews").fetchall()
-    pd.DataFrame(reviews).to_csv("./data/reviews.csv", index=False)
-    print("updating db")
     repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
@@ -92,5 +40,14 @@ scheduler = BackgroundScheduler()
 scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
 scheduler.start()
-demo.launch()

 import os
 import datetime
 from apscheduler.schedulers.background import BackgroundScheduler
+from database.database import DATABASE_FILE as DB_FILE
+from web import demo
+from loguru import logger
+from common.util import date_str
 TOKEN = os.environ.get('HUB_TOKEN')
 repo = huggingface_hub.Repository(
     use_auth_token=TOKEN
 )
 repo.git_pull()
+DATASET_FILE = f"./data/{DB_FILE}"
 # Set db to latest
+shutil.copyfile(DATASET_FILE, DB_FILE)
 def backup_db():
+    shutil.copyfile(DB_FILE, DATASET_FILE)
+    # db = sqlite3.connect(DB_FILE)
+    # pd.DataFrame(db.execute("SELECT * FROM words").fetchall()).to_csv("./data/words.csv", index=False)
+    # print("save word.csv")
+    # pd.DataFrame(db.execute("SELECT * FROM book").fetchall()).to_csv("./data/book.csv", index=False)
+    # print("save book.csv")
+    # pd.DataFrame(db.execute("SELECT * FROM unit").fetchall()).to_csv("./data/unit.csv", index=False)
+    # print("save unit.csv")
+    # db.close()
     repo.push_to_hub(blocking=False, commit_message=f"Updating data at {datetime.datetime.now()}")
 scheduler.add_job(func=backup_db, trigger="interval", seconds=60)
 scheduler.start()
+# def load_data():
+#     db = sqlite3.connect(DB_FILE)
+#     reviews, total_reviews = get_latest_reviews(db)
+#     db.close()
+#     return reviews, total_reviews
+# demo.load(load_data, None, [data, count])
+if __name__ == "__main__":
+    logger.add(f"output/logs/web_{date_str}.log", rotation="1 day", retention="7 days", level="INFO")
+    demo.launch()

common/util.py ADDED Viewed

	@@ -0,0 +1,31 @@

+from typing import *
+from loguru import logger
+from tqdm import tqdm
+import pandas as pd
+import datetime
+import multiprocessing
+from multiprocessing import Pool
+cpu_num = multiprocessing.cpu_count()
+logger.info(f"cpu_num: {cpu_num}")
+date_str = datetime.datetime.now().strftime("%Y%m%d_%Hh%Mm%Ss")
+def multiprocessing_mapping(
+    mapping_func,
+    items: List[Any],
+    batch_size=1000,
+    tmp_filepath=f"./output/multiprocessing_mapping_{date_str}_tmp.xlsx",
+):
+    pool = Pool(processes=cpu_num)
+    total_rows: List[Dict[str, str]] = []
+    for i in tqdm(range(0, len(items), batch_size)):
+        new_rows = pool.map(mapping_func, items[i:i+batch_size])
+        total_rows += new_rows
+        df = pd.DataFrame(total_rows)
+        df.to_excel(tmp_filepath, index=False)
+    pool.close()
+    pool.join()
+    return total_rows

database/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .database import SessionLocal, engine, Base
+from .schema import *
+from .operation import *
+def get_db():
+    db = SessionLocal()
+    try:
+        yield db
+    finally:
+        db.close()

database/constant.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ email = "[email protected]"
2	+ password = "123456"

database/database.py ADDED Viewed

	@@ -0,0 +1,10 @@

+from sqlalchemy import URL, create_engine
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker
+DATABASE_FILE = "app.db"
+SQLALCHEMY_DATABASE_URL = f"sqlite:///./{DATABASE_FILE}"
+engine = create_engine(SQLALCHEMY_DATABASE_URL, connect_args={"check_same_thread": False})
+SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
+Base = declarative_base()

database/model.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from typing import List, Union
+from pydantic import BaseModel
+class ItemBase(BaseModel):
+    title: str
+    description: Union[str, None] = None
+class ItemCreate(ItemBase):
+    pass
+class Item(ItemBase):
+    id: int
+    owner_id: int
+    class Config:
+        orm_mode = True
+class UserBase(BaseModel):
+    email: str
+class UserCreate(UserBase):
+    password: str
+class User(UserBase):
+    id: int
+    is_active: bool
+    items: List[Item] = []
+    class Config:
+        orm_mode = True

database/operation.py ADDED Viewed

	@@ -0,0 +1,704 @@

+import datetime
+from sqlalchemy.orm import Session
+from pydantic import BaseModel
+from typing import List, Optional, Tuple, Dict
+from . import schema
+from sqlalchemy import func, or_
+from sqlalchemy.orm import aliased
+from sqlalchemy.orm import Query
+from collections import defaultdict
+# # 创建一个别名的Address
+# AddressAlias = aliased(Address)
+# # 创建一个子查询
+# subquery = session.query(
+#    func.count(AddressAlias.id).label('address_count'),
+#    AddressAlias.user_id
+# ).group_by(AddressAlias.user_id).subquery()
+# # 使用子查询和连接查询
+# users = session.query(User, subquery.c.address_count).\
+#    outerjoin(subquery, User.id == subquery.c.user_id).\
+#    order_by(User.id).all()
+# for user, address_count in users:
+#    print(f'User {user.name} has {address_count} addresses.')
+# region User
+class UserBase(BaseModel):
+    email: str
+    is_active: bool
+class UserCreate(UserBase):
+    pass
+class UserUpdate(UserBase):
+    hashed_password: Optional[str]  # This is optional because you may not always want to update the password
+class User(UserBase):
+    id: str
+def create_user(db: Session, email: str, password: str):
+    db_user = schema.User(email=email, is_active=True)
+    db_user.set_password(password)
+    db.add(db_user)
+    db.commit()
+    db.refresh(db_user)
+    return db_user
+def get_user(db: Session, user_id: str) -> Optional[schema.User]:
+    return db.query(schema.User).filter(schema.User.id == user_id).first()
+def get_user_by_email(db: Session, email: str) -> Optional[schema.User]:
+    return db.query(schema.User).filter(schema.User.email == email).first()
+def get_users(db: Session, skip: int = 0, limit: int = 10) -> List[schema.User]:
+    return db.query(schema.User).offset(skip).limit(limit).all()
+def get_all_users(db: Session) -> List[schema.User]:
+    return db.query(schema.User).all()
+def update_user(db: Session, user_id: str, user: UserUpdate):
+    db_user = db.query(schema.User).filter(schema.User.id == user_id).first()
+    if db_user:
+        for key, value in user.dict(exclude_unset=True).items():
+            setattr(db_user, key, value)
+        db.commit()
+        db.refresh(db_user)
+        return db_user
+def delete_user(db: Session, user_id: str):
+    db_user = db.query(schema.User).filter(schema.User.id == user_id).first()
+    if db_user:
+        db.delete(db_user)
+        db.commit()
+        return db_user
+# endregion
+# region Item
+class ItemBase(BaseModel):
+    title: str
+    description: str
+class ItemCreate(ItemBase):
+    pass
+class ItemUpdate(ItemBase):
+    pass
+class Item(ItemBase):
+    id: str
+    owner_id: str
+def create_item(db: Session, item: ItemCreate, owner_id: str):
+    db_item = schema.Item(**item.dict(), owner_id=owner_id)
+    db.add(db_item)
+    db.commit()
+    db.refresh(db_item)
+    return db_item
+def get_item(db: Session, item_id: str):
+    return db.query(schema.Item).filter(schema.Item.id == item_id).first()
+def get_items(db: Session, skip: int = 0, limit: int = 10):
+    return db.query(schema.Item).offset(skip).limit(limit).all()
+def update_item(db: Session, item_id: str, item: ItemUpdate):
+    db_item = db.query(schema.Item).filter(schema.Item.id == item_id).first()
+    if db_item:
+        for key, value in item.dict(exclude_unset=True).items():
+            setattr(db_item, key, value)
+        db.commit()
+        db.refresh(db_item)
+        return db_item
+def delete_item(db: Session, item_id: str):
+    db_item = db.query(schema.Item).filter(schema.Item.id == item_id).first()
+    if db_item:
+        db.delete(db_item)
+        db.commit()
+        return db_item
+# endregion
+# region UserBook
+class UserBookBase(BaseModel):
+    owner_id: str
+    book_id: str
+    title: str
+    random: bool
+    batch_size: int
+    memorizing_batch: str = ""
+class UserBookCreate(UserBookBase):
+    pass
+class UserBookUpdate(UserBookBase):
+    pass
+class UserBook(UserBookBase):
+    id: str
+def create_user_book(db: Session, user_book: UserBookCreate):
+    db_user_book = schema.UserBook(**user_book.dict())
+    db.add(db_user_book)
+    db.commit()
+    db.refresh(db_user_book)
+    return db_user_book
+def get_user_book(db: Session, user_book_id: str) -> schema.UserBook:
+    return db.query(schema.UserBook).filter(schema.UserBook.id == user_book_id).first()
+def get_user_books_by_owner_id(db: Session, owner_id: str) -> List[schema.UserBook]:
+    return db.query(schema.UserBook).filter(schema.UserBook.owner_id == owner_id).all()
+def get_user_books(db: Session, skip: int = 0, limit: int = 10) -> List[schema.UserBook]:
+    return db.query(schema.UserBook).offset(skip).limit(limit).all()
+def update_user_book(db: Session, user_book_id: str, user_book: UserBookUpdate):
+    db_user_book = db.query(schema.UserBook).filter(schema.UserBook.id == user_book_id).first()
+    if db_user_book:
+        for key, value in user_book.dict(exclude_unset=True).items():
+            setattr(db_user_book, key, value)
+        db.commit()
+        db.refresh(db_user_book)
+        return db_user_book
+def update_user_book_memorizing_batch(db: Session, user_book_id: str, memorizing_batch: str):
+    db_user_book = db.query(schema.UserBook).filter(schema.UserBook.id == user_book_id).first()
+    if db_user_book:
+        db_user_book.memorizing_batch = memorizing_batch
+        db.commit()
+        db.refresh(db_user_book)
+        return db_user_book
+def delete_user_book(db: Session, user_book_id: str):
+    db_user_book = db.query(schema.UserBook).filter(schema.UserBook.id == user_book_id).first()
+    if db_user_book:
+        db.delete(db_user_book)
+        db.commit()
+        return db_user_book
+# endregion
+# region UserMemoryBatch
+class UserMemoryBatchBase(BaseModel):
+    user_book_id: str
+    story: str
+    translated_story: str
+    batch_type: str = "新词"
+class UserMemoryBatchCreate(UserMemoryBatchBase):
+    pass
+class UserMemoryBatchUpdate(UserMemoryBatchBase):
+    pass
+class UserMemoryBatch(UserMemoryBatchBase):
+    id: str
+def create_user_memory_batch(db: Session, memory_batch: UserMemoryBatchCreate):
+    db_memory_batch = schema.UserMemoryBatch(**memory_batch.dict())
+    db.add(db_memory_batch)
+    db.commit()
+    db.refresh(db_memory_batch)
+    return db_memory_batch
+def get_user_memory_batch(db: Session, memory_batch_id: str):
+    return db.query(schema.UserMemoryBatch).filter(schema.UserMemoryBatch.id == memory_batch_id).first()
+def get_user_memory_batchs(db: Session, skip: int = 0, limit: int = 10):
+    return db.query(schema.UserMemoryBatch).offset(skip).limit(limit).all()
+def get_user_memory_batches_by_user_book_id(db: Session, user_book_id: str) -> List[schema.UserMemoryBatch]:
+    return db.query(schema.UserMemoryBatch).filter(
+        schema.UserMemoryBatch.user_book_id == user_book_id
+    ).order_by(schema.UserMemoryBatch.create_time).all()
+def get_new_user_memory_batches_by_user_book_id(db: Session, user_book_id: str) -> List[schema.UserMemoryBatch]:
+    return db.query(schema.UserMemoryBatch).filter(
+        schema.UserMemoryBatch.user_book_id == user_book_id,
+        schema.UserMemoryBatch.batch_type == "新词",
+    ).order_by(schema.UserMemoryBatch.create_time).all()
+def actions_infomation(db: Session, action_query: Query[schema.UserMemoryBatchAction]):
+    distinct_actions = action_query.distinct().subquery()
+    batches = db.query(schema.UserMemoryBatch).join(distinct_actions, distinct_actions.c.batch_id == schema.UserMemoryBatch.id).all()
+    batch_id_to_batch = {batch.id: batch for batch in batches}
+    batch_id_to_words = {batch.id: get_words_in_batch(db, batch.id) for batch in batches}
+    return batches, batch_id_to_batch, batch_id_to_words
+def get_user_memory_batch_history(db: Session, user_book_id: str):
+    action_query = db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.action == "end").join(
+        schema.UserMemoryBatch, schema.UserMemoryBatch.id == schema.UserMemoryBatchAction.batch_id
+    ).filter(schema.UserMemoryBatch.user_book_id == user_book_id)
+    actions = action_query.order_by(schema.UserMemoryBatchAction.create_time).all()
+    distinct_actions = action_query.distinct().subquery()
+    batches = db.query(schema.UserMemoryBatch).join(distinct_actions, distinct_actions.c.batch_id == schema.UserMemoryBatch.id).all()
+    batch_id_to_batch = {batch.id: batch for batch in batches}
+    batch_id_to_words = {batch.id: get_words_in_batch(db, batch.id) for batch in batches}
+    batch_id_to_actions = {batch.id: get_user_memory_actions_in_batch(db, batch.id) for batch in batches}
+    return actions, batch_id_to_batch, batch_id_to_words, batch_id_to_actions
+def get_user_memory_batch_history_in_minutes(db: Session, user_book_id: str, minutes: int):
+    action_query = db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.action == "end").join(
+        schema.UserMemoryBatch, schema.UserMemoryBatch.id == schema.UserMemoryBatchAction.batch_id
+    ).filter(
+        schema.UserMemoryBatch.user_book_id == user_book_id,
+        schema.UserMemoryBatchAction.create_time > datetime.datetime.now() - datetime.timedelta(minutes=minutes),
+    ).limit(20)
+    actions = action_query.order_by(schema.UserMemoryBatchAction.create_time).all()
+    distinct_actions = action_query.distinct().subquery()
+    batches = db.query(schema.UserMemoryBatch).join(distinct_actions, distinct_actions.c.batch_id == schema.UserMemoryBatch.id).all()
+    batch_id_to_batch = {batch.id: batch for batch in batches}
+    batch_id_to_words = {batch.id: get_words_in_batch(db, batch.id) for batch in batches}
+    # batch_actions = db.query(schema.UserMemoryBatchAction).join(distinct_actions, distinct_actions.c.batch_id == schema.UserMemoryBatchAction.batch_id).all()
+    # return actions, batch_id_to_batch, batch_id_to_words, batch_actions
+    return actions, batch_id_to_batch, batch_id_to_words
+def get_user_memory_word_history_in_minutes(db: Session, user_book_id: str, minutes: int):
+    action_query = db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.action == "end").join(
+        schema.UserMemoryBatch, schema.UserMemoryBatch.id == schema.UserMemoryBatchAction.batch_id
+    ).filter(
+        schema.UserMemoryBatch.user_book_id == user_book_id,
+        # schema.UserMemoryBatchAction.create_time > datetime.datetime.now() - datetime.timedelta(minutes=minutes),
+    ).limit(200)
+    distinct_actions = action_query.distinct().subquery()
+    batches = db.query(schema.UserMemoryBatch).join(distinct_actions, distinct_actions.c.batch_id == schema.UserMemoryBatch.id).all()
+    words = [get_words_in_batch(db, batch.id) for batch in batches]
+    words = sum(words, [])
+    return words
+def update_user_memory_batch(db: Session, memory_batch_id: str, memory_batch: UserMemoryBatchUpdate):
+    db_memory_batch = db.query(schema.UserMemoryBatch).filter(schema.UserMemoryBatch.id == memory_batch_id).first()
+    if db_memory_batch:
+        for key, value in memory_batch.dict(exclude_unset=True).items():
+            setattr(db_memory_batch, key, value)
+        db.commit()
+        db.refresh(db_memory_batch)
+        return db_memory_batch
+def delete_user_memory_batch(db: Session, memory_batch_id: str):
+    db_memory_batch = db.query(schema.UserMemoryBatch).filter(schema.UserMemoryBatch.id == memory_batch_id).first()
+    if db_memory_batch:
+        db.delete(db_memory_batch)
+        db.commit()
+        return db_memory_batch
+# endregion
+# region UserMemoryBatchAction
+class UserMemoryBatchActionBase(BaseModel):
+    batch_id: str
+    action: str
+class UserMemoryBatchActionCreate(UserMemoryBatchActionBase):
+    pass
+class UserMemoryBatchActionUpdate(UserMemoryBatchActionBase):
+    pass
+class UserMemoryBatchAction(UserMemoryBatchActionBase):
+    id: str
+    create_time: str
+    update_time: str
+def create_user_memory_batch_action(db: Session, memory_batch_action: UserMemoryBatchActionCreate):
+    db_memory_batch_action = schema.UserMemoryBatchAction(**memory_batch_action.dict())
+    db.add(db_memory_batch_action)
+    db.commit()
+    db.refresh(db_memory_batch_action)
+    return db_memory_batch_action
+def get_user_memory_batch_action(db: Session, memory_batch_action_id: str):
+    return db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.id == memory_batch_action_id).first()
+def get_user_memory_batch_actions(db: Session, skip: int = 0, limit: int = 10) -> List[schema.UserMemoryBatchAction]:
+    return db.query(schema.UserMemoryBatchAction).offset(skip).limit(limit).all()
+def get_user_memory_batch_actions_by_user_memory_batch_id(db: Session, user_memory_batch_id: str) -> List[schema.UserMemoryBatchAction]:
+    return db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.batch_id == user_memory_batch_id).all()
+def get_actions_at_each_batch(db: Session, memory_batch_ids: List[str]) -> List[schema.UserMemoryBatchAction]:
+    return db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.batch_id.in_(memory_batch_ids)).all()
+def get_finished_actions_at_each_batch(db: Session, memory_batch_ids: List[str]) -> List[schema.UserMemoryBatchAction]:
+    return db.query(schema.UserMemoryBatchAction).filter(
+        schema.UserMemoryBatchAction.batch_id.in_(memory_batch_ids),
+        schema.UserMemoryBatchAction.action == "end",
+    ).order_by(schema.UserMemoryBatchAction.create_time).all()
+def update_user_memory_batch_action(db: Session, memory_batch_action_id: str, memory_batch_action: UserMemoryBatchActionUpdate):
+    db_memory_batch_action = db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.id == memory_batch_action_id).first()
+    if db_memory_batch_action:
+        for key, value in memory_batch_action.dict(exclude_unset=True).items():
+            setattr(db_memory_batch_action, key, value)
+        db.commit()
+        db.refresh(db_memory_batch_action)
+        return db_memory_batch_action
+def delete_user_memory_batch_action(db: Session, memory_batch_action_id: str):
+    db_memory_batch_action = db.query(schema.UserMemoryBatchAction).filter(schema.UserMemoryBatchAction.id == memory_batch_action_id).first()
+    if db_memory_batch_action:
+        db.delete(db_memory_batch_action)
+        db.commit()
+        return db_memory_batch_action
+# endregion
+# region UserMemoryBatchGenerationHistory
+class UserMemoryBatchGenerationHistoryBase(BaseModel):
+    batch_id: str
+    story: str
+    translated_story: str
+class UserMemoryBatchGenerationHistoryCreate(UserMemoryBatchGenerationHistoryBase):
+    pass
+class UserMemoryBatchGenerationHistoryUpdate(UserMemoryBatchGenerationHistoryBase):
+    pass
+class UserMemoryBatchGenerationHistory(UserMemoryBatchGenerationHistoryBase):
+    id: str
+    create_time: str
+    update_time: str
+def create_user_memory_batch_generation_history(db: Session, memory_batch_generation_history: UserMemoryBatchGenerationHistoryCreate):
+    db_memory_batch_generation_history = schema.UserMemoryBatchGenerationHistory(**memory_batch_generation_history.dict())
+    db.add(db_memory_batch_generation_history)
+    db.commit()
+    db.refresh(db_memory_batch_generation_history)
+    return db_memory_batch_generation_history
+def get_user_memory_batch_generation_history(db: Session, memory_batch_generation_history_id: str):
+    return db.query(schema.UserMemoryBatchGenerationHistory).filter(schema.UserMemoryBatchGenerationHistory.id == memory_batch_generation_history_id).first()
+def get_user_memory_batch_generation_historys(db: Session, skip: int = 0, limit: int = 10) -> List[schema.UserMemoryBatchGenerationHistory]:
+    return db.query(schema.UserMemoryBatchGenerationHistory).offset(skip).limit(limit).all()
+def get_user_memory_batch_generation_historys_by_user_memory_batch_id(db: Session, user_memory_batch_id: str) -> List[schema.UserMemoryBatchGenerationHistory]:
+    return db.query(schema.UserMemoryBatchGenerationHistory).filter(schema.UserMemoryBatchGenerationHistory.batch_id == user_memory_batch_id).all()
+def get_generation_historys_at_each_batch(db: Session, memory_batch_ids: List[str]) -> List[schema.UserMemoryBatchGenerationHistory]:
+    return db.query(schema.UserMemoryBatchGenerationHistory).filter(schema.UserMemoryBatchGenerationHistory.batch_id.in_(memory_batch_ids)).all()
+def get_generation_hostorys_by_user_book_id(db: Session, user_book_id: str) -> Dict[str, Tuple[List[schema.Word], List[schema.UserMemoryBatchGenerationHistory]]]:
+    batches = get_user_memory_batches_by_user_book_id(db, user_book_id)
+    batch_ids = [batch.id for batch in batches]
+    batch_id_to_words_and_history = {}
+    for batch_id in batch_ids:
+        historys = get_user_memory_batch_generation_historys_by_user_memory_batch_id(db, batch_id)
+        if len(historys) == 0:
+            continue
+        words = get_words_in_batch(db, batch_id)
+        batch_id_to_words_and_history[batch_id] = (words, historys)
+    return batch_id_to_words_and_history
+def update_user_memory_batch_generation_history(db: Session, memory_batch_generation_history_id: str, memory_batch_generation_history: UserMemoryBatchGenerationHistoryUpdate):
+    db_memory_batch_generation_history = db.query(schema.UserMemoryBatchGenerationHistory).filter(schema.UserMemoryBatchGenerationHistory.id == memory_batch_generation_history_id).first()
+    if db_memory_batch_generation_history:
+        for key, value in memory_batch_generation_history.dict(exclude_unset=True).items():
+            setattr(db_memory_batch_generation_history, key, value)
+        db.commit()
+        db.refresh(db_memory_batch_generation_history)
+        return db_memory_batch_generation_history
+def delete_user_memory_batch_generation_history(db: Session, memory_batch_generation_history_id: str):
+    db_memory_batch_generation_history = db.query(schema.UserMemoryBatchGenerationHistory).filter(schema.UserMemoryBatchGenerationHistory.id == memory_batch_generation_history_id).first()
+    if db_memory_batch_generation_history:
+        db.delete(db_memory_batch_generation_history)
+        db.commit()
+        return db_memory_batch_generation_history
+# endregion
+# region UserMemoryWord
+class UserMemoryWordBase(BaseModel):
+    batch_id: str
+    word_id: str
+class UserMemoryWordCreate(UserMemoryWordBase):
+    pass
+class UserMemoryWordUpdate(UserMemoryWordBase):
+    pass
+class UserMemoryWord(UserMemoryWordBase):
+    id: str
+def create_user_memory_word(db: Session, memory_word: UserMemoryWordCreate):
+    db_memory_word = schema.UserMemoryWord(**memory_word.dict())
+    db.add(db_memory_word)
+    db.commit()
+    db.refresh(db_memory_word)
+    return db_memory_word
+def get_user_memory_word(db: Session, memory_word_id: str):
+    return db.query(schema.UserMemoryWord).filter(schema.UserMemoryWord.id == memory_word_id).first()
+def get_user_memory_words(db: Session, skip: int = 0, limit: int = 10) -> List[schema.UserMemoryWord]:
+    return db.query(schema.UserMemoryWord).offset(skip).limit(limit).all()
+def get_user_memory_words_by_batch_id(db: Session, batch_id: str) -> List[schema.UserMemoryWord]:
+    return db.query(schema.UserMemoryWord).filter(schema.UserMemoryWord.batch_id == batch_id).all()
+def update_user_memory_word(db: Session, memory_word_id: str, memory_word: UserMemoryWordUpdate):
+    db_memory_word = db.query(schema.UserMemoryWord).filter(schema.UserMemoryWord.id == memory_word_id).first()
+    if db_memory_word:
+        for key, value in memory_word.dict(exclude_unset=True).items():
+            setattr(db_memory_word, key, value)
+        db.commit()
+        db.refresh(db_memory_word)
+        return db_memory_word
+def delete_user_memory_word(db: Session, memory_word_id: str):
+    db_memory_word = db.query(schema.UserMemoryWord).filter(schema.UserMemoryWord.id == memory_word_id).first()
+    if db_memory_word:
+        db.delete(db_memory_word)
+        db.commit()
+        return db_memory_word
+# endregion
+# region UserMemoryAction
+class UserMemoryActionBase(BaseModel):
+    batch_id: str
+    word_id: str
+    action: str
+class UserMemoryActionCreate(UserMemoryActionBase):
+    pass
+class UserMemoryActionUpdate(UserMemoryActionBase):
+    pass
+class UserMemoryAction(UserMemoryActionBase):
+    id: str
+    create_time: str
+    update_time: str
+def create_user_memory_action(db: Session, memory_action: UserMemoryActionCreate):
+    db_memory_action = schema.UserMemoryAction(**memory_action.dict())
+    db.add(db_memory_action)
+    db.commit()
+    db.refresh(db_memory_action)
+    return db_memory_action
+def get_user_memory_action(db: Session, memory_action_id: str) -> schema.UserMemoryAction:
+    return db.query(schema.UserMemoryAction).filter(schema.UserMemoryAction.id == memory_action_id).first()
+def get_user_memory_actions(db: Session, skip: int = 0, limit: int = 10)-> List[schema.UserMemoryAction]:
+    return db.query(schema.UserMemoryAction).offset(skip).limit(limit).all()
+def get_user_memory_actions_by_word_id(db: Session, word_id: str)-> List[schema.UserMemoryAction]:
+    return db.query(schema.UserMemoryAction).filter(schema.UserMemoryAction.word_id == word_id).all()
+def get_actions_at_each_word(db: Session, word_ids: List[str]) -> List[schema.UserMemoryAction]:
+    return db.query(schema.UserMemoryAction).filter(schema.UserMemoryAction.word_id.in_(word_ids)).all()
+def get_user_memory_actions_in_batch(db: Session, batch_id: str) -> List[schema.UserMemoryAction]:
+    return db.query(schema.UserMemoryAction).filter(schema.UserMemoryAction.batch_id == batch_id).all()
+def update_user_memory_action(db: Session, memory_action_id: str, memory_action: UserMemoryActionUpdate):
+    db_memory_action = db.query(schema.UserMemoryAction).filter(schema.UserMemoryAction.id == memory_action_id).first()
+    if db_memory_action:
+        for key, value in memory_action.dict(exclude_unset=True).items():
+            setattr(db_memory_action, key, value)
+        db.commit()
+        db.refresh(db_memory_action)
+        return db_memory_action
+def delete_user_memory_action(db: Session, memory_action_id: str):
+    db_memory_action = db.query(schema.UserMemoryAction).filter(schema.UserMemoryAction.id == memory_action_id).first()
+    if db_memory_action:
+        db.delete(db_memory_action)
+        db.commit()
+        return db_memory_action
+# endregion
+# region Book
+class BookBase(BaseModel):
+    bk_order: float = 0
+    bk_name: str
+    bk_item_num: int
+    bk_author: str = ""
+    bk_comment: str = ""
+    bk_organization: str = ""
+    bk_publisher: str = ""
+    bk_version: str = ""
+    permission: str = "private"
+    creator: str
+class BookCreate(BookBase):
+    pass
+class BookUpdate(BookBase):
+    pass
+class Book(BookBase):
+    bk_id: str
+def create_book(db: Session, book: BookCreate):
+    db_book = schema.Book(**book.dict())
+    db.add(db_book)
+    db.commit()
+    db.refresh(db_book)
+    return db_book
+def get_book(db: Session, book_id: str):
+    return db.query(schema.Book).filter(schema.Book.bk_id == book_id).first()
+def get_book_by_name(db: Session, book_name: str):
+    return db.query(schema.Book).filter(schema.Book.bk_name == book_name).first()
+def get_books(db: Session, skip: int = 0, limit: int = 10):
+    return db.query(schema.Book).offset(skip).limit(limit).all()
+def get_all_books(db: Session):
+    return db.query(schema.Book).all()
+def get_all_books_for_user(db: Session, user_id: str):
+    return db.query(schema.Book).filter(
+        or_(schema.Book.creator == user_id, schema.Book.permission == "public")
+    ).order_by(schema.Book.permission, schema.Book.create_time).all()
+def get_book_count(db: Session):
+    return db.query(schema.Book).count()
+def update_book(db: Session, book_id: str, book: BookUpdate):
+    db_book = db.query(schema.Book).filter(schema.Book.bk_id == book_id).first()
+    if db_book:
+        for key, value in book.dict(exclude_unset=True).items():
+            setattr(db_book, key, value)
+        db.commit()
+        db.refresh(db_book)
+        return db_book
+def delete_book(db: Session, book_id: str):
+    db_book = db.query(schema.Book).filter(schema.Book.bk_id == book_id).first()
+    if db_book:
+        db.delete(db_book)
+        db.commit()
+        return db_book
+# endregion
+# region Unit
+class UnitBase(BaseModel):
+    bv_book_id: str
+    bv_voc_id: str
+    bv_flag: int = 1
+    bv_tag: str = ""
+    bv_order: int = 1
+class UnitCreate(UnitBase):
+    pass
+class UnitUpdate(UnitBase):
+    pass
+class Unit(UnitBase):
+    pass
+def create_unit(db: Session, unit: UnitCreate):
+    db_unit = schema.Unit(**unit.dict())
+    db.add(db_unit)
+    db.commit()
+    db.refresh(db_unit)
+    return db_unit
+def get_unit(db: Session, unit_id: str):
+    return db.query(schema.Unit).filter(schema.Unit.bv_id == unit_id).first()
+def get_units(db: Session, skip: int = 0, limit: int = 10):
+    return db.query(schema.Unit).offset(skip).limit(limit).all()
+def update_unit(db: Session, unit_id: str, unit: UnitUpdate):
+    db_unit = db.query(schema.Unit).filter(schema.Unit.bv_id == unit_id).first()
+    if db_unit:
+        for key, value in unit.dict(exclude_unset=True).items():
+            setattr(db_unit, key, value)
+        db.commit()
+        db.refresh(db_unit)
+        return db_unit
+def delete_unit(db: Session, unit_id: str):
+    db_unit = db.query(schema.Unit).filter(schema.Unit.bv_id == unit_id).first()
+    if db_unit:
+        db.delete(db_unit)
+        db.commit()
+        return db_unit
+# endregion
+# region Word
+class WordBase(BaseModel):
+    vc_id: str
+    vc_vocabulary: str
+    vc_phonetic_uk: str
+    vc_phonetic_us: str
+    vc_frequency: float
+    vc_difficulty: float
+    vc_acknowledge_rate: float
+class WordCreate(WordBase):
+    pass
+class WordUpdate(WordBase):
+    pass
+class Word(WordBase):
+    pass
+def create_word(db: Session, word: WordCreate, unit_id: str):
+    db_word = schema.Word(**word.dict(), vc_id=unit_id)
+    db.add(db_word)
+    db.commit()
+    db.refresh(db_word)
+    return db_word
+def get_word(db: Session, word_id: str):
+    return db.query(schema.Word).filter(schema.Word.vc_id == word_id).first()
+def get_words(db: Session, skip: int = 0, limit: int = 10) -> List[schema.Word]:
+    return db.query(schema.Word).offset(skip).limit(limit).all()
+def get_words_by_vocabulary(db: Session, vocabulary: List[str]) -> List[schema.Word]:
+    return db.query(schema.Word).filter(schema.Word.vc_vocabulary.in_(vocabulary)).all()
+def get_words_by_ids(db: Session, ids: List[str]) -> List[schema.Word]:
+    return db.query(schema.Word).filter(schema.Word.vc_id.in_(ids)).all()
+def get_words_in_batch(db: Session, batch_id: str) -> List[schema.Word]:
+    return db.query(schema.Word).join(schema.UserMemoryWord, schema.UserMemoryWord.word_id == schema.Word.vc_id).filter(schema.UserMemoryWord.batch_id == batch_id).all()
+def get_words_at_each_batch(db: Session, batch_ids: List[str]) -> List[schema.Word]:
+    return db.query(schema.Word).join(schema.UserMemoryWord, schema.UserMemoryWord.word_id == schema.Word.vc_id).filter(schema.UserMemoryWord.batch_id.in_(batch_ids)).all()
+def get_words_in_user_book(db: Session, user_book_id: str) -> List[schema.Word]:
+    batches = get_new_user_memory_batches_by_user_book_id(db, user_book_id)
+    batch_ids = [batch.id for batch in batches]
+    return get_words_at_each_batch(db, batch_ids)
+def update_word(db: Session, word_id: str, word: WordUpdate):
+    db_word = db.query(schema.Word).filter(schema.Word.vc_id == word_id).first()
+    if db_word:
+        for key, value in word.dict(exclude_unset=True).items():
+            setattr(db_word, key, value)
+        db.commit()
+        db.refresh(db_word)
+        return db_word
+def delete_word(db: Session, word_id: str):
+    db_word = db.query(schema.Word).filter(schema.Word.vc_id == word_id).first()
+    if db_word:
+        db.delete(db_word)
+        db.commit()
+        return db_word
+# endregion

database/schema.py ADDED Viewed

	@@ -0,0 +1,209 @@

+from datetime import datetime
+from sqlalchemy import Boolean, Column, ForeignKey, Integer, String, Float, DateTime
+from sqlalchemy.orm import relationship
+from .database import Base
+import uuid
+import bcrypt
+# class ModelBase(Base):
+#     __abstract__ = True
+#     id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    # created_at = Column(DateTime, default=db.func.current_timestamp())
+    # updated_at = Column(DateTime,
+    #                 default=func.current_timestamp(),
+    #                 onupdate=func.current_timestamp())
+class User(Base):
+    __tablename__ = "users"
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    email = Column(String, unique=True, index=True)
+    encrypted_password = Column(String)
+    is_active = Column(Boolean, default=True)
+    items = relationship("Item", back_populates="owner")
+    # write a sql to update the user's password
+    # $2b$12$huJdmqFPzWU.9rumd2wpSOZUnCJ0bufmA4vl5T9PDc7V.xLgWAqSu
+    # UPDATE = "UPDATE users SET encrypted_password = '$2b$12$huJdmqFPzWU.9rumd2wpSOZUnCJ0bufmA4vl5T9PDc7V.xLgWAqSu' WHERE email = '[email protected]'"
+    def set_password(self, password: str):
+        hashed = bcrypt.hashpw(password.encode(), bcrypt.gensalt())
+        self.encrypted_password = str(hashed, encoding='utf-8')
+    def verify_password(self, password: str):
+        return bcrypt.checkpw(password.encode(), bytes(self.encrypted_password, encoding='utf-8'))
+    def __str__(self):
+        return f"<User {self.email}>"
+    def __repr__(self):
+        return f"<User {self.email}>"
+class Item(Base):
+    __tablename__ = "items"
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    title = Column(String)
+    description = Column(String)
+    owner_id = Column(String, ForeignKey("users.id"))
+    owner = relationship("User", back_populates="items")
+# region 记忆单词相关
+class UserBook(Base):
+    __tablename__ = "user_book"
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    owner_id = Column(String, ForeignKey("users.id"))
+    book_id = Column(String, ForeignKey("book.bk_id"))
+    title = Column(String)
+    batch_size = Column(Integer, default=10)
+    random = Column(Boolean, default=True)
+    memorizing_batch = Column(String, default='')
+class UserMemoryBatch(Base):
+    __tablename__ = "user_memory_batch"
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    user_book_id = Column(String, ForeignKey("user_book.id"))
+    story = Column(String, default="")
+    translated_story = Column(String, default="")
+    batch_type = Column(String, default="新词")  # 新词, 回忆, 复习
+    create_time = Column(DateTime, default=datetime.now)
+    update_time = Column(DateTime, onupdate=datetime.now, default=datetime.now)
+    words = relationship("UserMemoryWord", back_populates="batch")
+class UserMemoryBatchAction(Base):
+    __tablename__ = "user_memory_batch_action"
+    """
+    开始记忆到结束记忆的时间，可以计算记忆效率
+    """
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    batch_id = Column(String, ForeignKey("user_memory_batch.id"))
+    action = Column(String, default="start")  # start or end
+    create_time = Column(DateTime, default=datetime.now)
+    update_time = Column(DateTime, onupdate=datetime.now, default=datetime.now)
+class UserMemoryBatchGenerationHistory(Base):
+    __tablename__ = "user_memory_batch_generation_history"
+    """
+    开始记忆到结束记忆的时间，可以计算记忆效率
+    """
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    batch_id = Column(String, ForeignKey("user_memory_batch.id"))
+    story = Column(String, default="")
+    translated_story = Column(String, default="")
+    create_time = Column(DateTime, default=datetime.now)
+    update_time = Column(DateTime, onupdate=datetime.now, default=datetime.now)
+class UserMemoryWord(Base):
+    __tablename__ = "user_memory_word"
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    batch_id = Column(String, ForeignKey("user_memory_batch.id"))
+    word_id = Column(String, ForeignKey("word.vc_id"))
+    batch = relationship("UserMemoryBatch", back_populates="words")
+class UserMemoryAction(Base):
+    __tablename__ = "user_memory_action"
+    id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    batch_id = Column(String, ForeignKey("user_memory_batch.id"))
+    word_id = Column(String, ForeignKey("word.vc_id"))
+    action = Column(String, default="remenber")  # remenber or forget
+    create_time = Column(DateTime, default=datetime.now)
+    update_time = Column(DateTime, onupdate=datetime.now, default=datetime.now)
+# endregion
+class Book(Base):
+    __tablename__ = "book"
+#   {'bk_id': 'd645920e395fedad7bbbed0e',
+#   'bk_parent_id': '6512bd43d9caa6e02c990b0a',
+#   'bk_level': 2,
+#   'bk_order': 2.0,
+#   'bk_name': '人教版高中英语1 - 必修',
+#   'bk_item_num': 315,
+#   'bk_direct_item_num': 315,
+#   'bk_author': '刘道义',
+#   'bk_book': '人教版普通高中课程标准实验教科书 英语 1 必修',
+#   'bk_comment': '黑体：本单元重点词汇和短语；无“△”：课标词汇，要求掌握；有“△”：不要求掌握（会出现大量缩写、人名、地名和短语，请选背）。',
+#   'bk_organization': '人民教育出版社 课程教材研究所；英语课程教材研究开发中心',
+#   'bk_publisher': '人民教育出版社',
+#   'bk_version': '2007年1月第2版',
+#   'bk_flag': '默认：152;黑体：97;前△：66'},
+    bk_id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    bk_order = Column(Float)
+    bk_name = Column(String)
+    bk_item_num = Column(Integer)
+    bk_author = Column(String, default='')
+    bk_comment = Column(String, default='')
+    bk_organization = Column(String, default='')
+    bk_publisher = Column(String, default='')
+    bk_version = Column(String, default='')
+    permission = Column(String, default='private')
+    creator = Column(String, ForeignKey("users.id"))
+    create_time = Column(DateTime, default=datetime.now)
+    update_time = Column(DateTime, onupdate=datetime.now, default=datetime.now)
+    def __str__(self):
+        return f"{self.bk_name}\n    [{self.bk_item_num} words, {self.bk_version}]"
+    def __repr__(self):
+        return f"<Book {self.bk_name}>"
+class Unit(Base):
+    __tablename__ = "unit"
+    # ['bv_id', 'bv_book_id', 'bv_voc_id', 'bv_flag', 'bv_tag', 'bv_order']
+#     {'bv_id': '58450c828958a37d5c10f763',
+#   'bv_book_id': 'd645920e395fedad7bbbed0e',
+#   'bv_voc_id': '57067b9ca172044907c615d7',
+#   'bv_flag': 4,
+#   'bv_tag': 'Unit 1',
+#   'bv_order': 1},
+    bv_id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    bv_voc_id = Column(String, ForeignKey("word.vc_id"))
+    bv_book_id = Column(String, ForeignKey("book.bk_id"))
+    bv_flag = Column(Integer)
+    bv_tag = Column(String)
+    bv_order = Column(Integer)
+class Word(Base):
+    __tablename__ = "word"
+    # vc_id>vc_vocabulary>vc_phonetic_uk>vc_phonetic_us>vc_frequency>vc_difficulty>vc_acknowledge_rate
+    # 57067c89a172044907c6698e>superspecies>[su:pərsˈpi:ʃi:z]>[supɚsˈpiʃiz]>0.0>1>0.664122
+    vc_id = Column(String, primary_key=True, index=True, default=lambda: str(uuid.uuid4()))
+    vc_vocabulary = Column(String)
+    vc_translation = Column(String)
+    vc_phonetic_uk = Column(String)
+    vc_phonetic_us = Column(String)
+    vc_frequency = Column(Float)
+    vc_difficulty = Column(Float)
+    vc_acknowledge_rate = Column(Float)
+    def __str__(self):
+        return f"{self.vc_vocabulary} {self.vc_translation}\n[{self.vc_phonetic_uk}] [{self.vc_phonetic_us}]"
+    def __repr__(self):
+        return f"{self.vc_vocabulary} {self.vc_translation}\n[{self.vc_phonetic_uk}] [{self.vc_phonetic_us}]"

memorize.py ADDED Viewed

	@@ -0,0 +1,264 @@

+from sqlalchemy.orm import Session
+from typing import List, Tuple
+from tqdm import tqdm
+from database.operation import *
+from database import schema
+import random
+from loguru import logger
+import math
+# 记单词
+from story_agent import generate_story_and_translated_story
+from common.util import date_str, multiprocessing_mapping
+def get_words_for_book(db: Session, user_book: UserBook) -> List[schema.Word]:
+    book = get_book(db, user_book.book_id)
+    if book is None:
+        logger.warning("book not found")
+        return []
+    q = db.query(schema.Word).join(schema.Unit, schema.Unit.bv_voc_id == schema.Word.vc_id)
+    words = q.filter(schema.Unit.bv_book_id == book.bk_id).order_by(schema.Word.vc_difficulty).all()
+    return words
+def save_words_as_book(db: Session, user_id: str, words: List[schema.Word], title: str):
+    book = create_book(db, BookCreate(bk_name=f"{title}（待学单词自动保存为单词书）", bk_item_num=len(words), creator=user_id))
+    for i, word in tqdm(enumerate(words)):
+        unit = UnitCreate(bv_book_id=book.bk_id, bv_voc_id=word.vc_id)
+        db_unit = schema.Unit(**unit.dict())
+        db.add(db_unit)
+        if i % 500 == 0:
+            db.commit()
+    db.commit()
+    return book
+def save_batch_words(db: Session, i: int, user_book_id: str, batch_words: List[schema.Word]):
+    batch_words_str_list = [word.vc_vocabulary for word in batch_words]
+    # 我们只在第一个批次生成故事。后面的批次根据用户的记忆情况生成故事，提前 3 个批次生成故事
+    story, translated_story = generate_story_and_translated_story(batch_words_str_list)
+    return save_batch_words_with_story(db, i, user_book_id, batch_words, story, translated_story)
+def save_batch_words_with_story(db: Session, i: int, user_book_id: str, batch_words: List[schema.Word], story: str, translated_story: str):
+    batch_words_str_list = [word.vc_vocabulary for word in batch_words]
+    logger.info(f"{i}, {batch_words_str_list}\n{story}")
+    user_memory_batch = create_user_memory_batch(db, UserMemoryBatchCreate(
+        user_book_id=user_book_id,
+        story=story,
+        translated_story=translated_story
+    ))
+    create_user_memory_batch_generation_history(db, UserMemoryBatchGenerationHistoryCreate(
+        batch_id=user_memory_batch.id,
+        story=story,
+        translated_story=translated_story
+    ))
+    for word in batch_words:
+        memory_word = UserMemoryWordCreate(
+            batch_id=user_memory_batch.id,
+            word_id=word.vc_id
+        )
+        db_memory_word = schema.UserMemoryWord(**memory_word.dict())
+        db.add(db_memory_word)
+    db.commit()
+    return user_memory_batch
+async def async_save_batch_words(db: Session, i: int, user_book_id: str, batch_words: List[schema.Word]):
+    save_batch_words(db, i, user_book_id, batch_words)
+import asyncio
+async def async_save_batch_words_list(db: Session, user_book_id: str, batch_words_list: List[List[schema.Word]]):
+    for i, batch_words in enumerate(batch_words_list):
+        asyncio.ensure_future(async_save_batch_words(db, i+1, user_book_id, batch_words))
+def transform(batch_words: List[str]):
+    story, translated_story = generate_story_and_translated_story(batch_words)
+    return {
+        "story": story,
+        "translated_story": translated_story,
+        "words": batch_words
+    }
+def save_batch_words_list(db: Session, user_book_id: str, batch_words_list: List[List[schema.Word]]):
+    word_str_list = []
+    for batch_words in batch_words_list:
+        word_str_list.append([word.vc_vocabulary for word in batch_words])
+    story_list = multiprocessing_mapping(transform, word_str_list, tmp_filepath=f"./output/logs/save_batch_words_list_{date_str}.xlsx")
+    logger.info(f"story_list: {len(story_list)}")
+    for i, (batch_words, story) in tqdm(enumerate(zip(batch_words_list, story_list))):
+        save_batch_words_with_story(db, i, user_book_id, batch_words, story['story'], story['translated_story'])
+def track(db: Session, user_book: schema.UserBook, words: List[schema.Word]):
+    batch_size = user_book.batch_size
+    logger.debug(f"{[w.vc_vocabulary for w in words]}")
+    logger.debug(f"batch_size: {batch_size}")
+    logger.debug(f"words count: {len(words)}")
+    if user_book.random:
+        random.shuffle(words)
+    else:
+        words.sort(key=lambda x: x.vc_frequency, reverse=True)  # 按照词频排序，词频越高越容易记住
+    logger.debug(f"saving words as book")
+    save_words_as_book(db, user_book.owner_id, words, user_book.title)
+    logger.debug(f"saved words as book [{user_book.title}]")
+    batch_words_list = []
+    for i in range(0, len(words), batch_size):
+        batch_words = words[i:i+batch_size]
+        batch_words_list.append(batch_words)
+    logger.debug(f"batch_words_list: {len(batch_words_list)}")
+    if len(batch_words_list) == 0:
+        return
+    first_batch_words = batch_words_list[0]
+    user_memory_batch = save_batch_words(db, 0, user_book.id, first_batch_words)
+    user_book.memorizing_batch = user_memory_batch.id
+    db.commit()
+    save_batch_words_list(db, user_book.id, batch_words_list[1:])
+    # asyncio.run(async_save_batch_words_list(db, user_book.id, batch_words_list[1:]))
+def remenber(db: Session, batch_id: str, word_id: str):
+    return create_user_memory_action(db, UserMemoryActionCreate(
+        batch_id=batch_id,
+        word_id=word_id,
+        action="remember"
+    ))
+def forget(db: Session, batch_id: str, word_id: str):
+    return create_user_memory_action(db, UserMemoryActionCreate(
+        batch_id=batch_id,
+        word_id=word_id,
+        action="forget"
+    ))
+def save_memorizing_word_action(db: Session, batch_id: str, actions: List[Tuple[str, str]]):
+    """
+    actions: [(word_id, remember | forget)]
+    """
+    for word_id, action in actions:
+        memory_action = UserMemoryActionCreate(
+            batch_id=batch_id,
+            word_id=word_id,
+            action=action
+        )
+        db_memory_action = schema.UserMemoryAction(**memory_action.dict())
+        db.add(db_memory_action)
+    db.commit()
+def on_batch_start(db: Session, user_memory_batch_id: str):
+    return create_user_memory_batch_action(db, UserMemoryBatchActionCreate(
+        batch_id=user_memory_batch_id,
+        action="start"
+    ))
+def on_batch_end(db: Session, user_memory_batch_id: str):
+    return create_user_memory_batch_action(db, UserMemoryBatchActionCreate(
+        batch_id=user_memory_batch_id,
+        action="end"
+    ))
+# def generate_recall_batch(db: Session, user_book: schema.UserBook):
+def generate_next_batch(db: Session, user_book: schema.UserBook,
+                        minutes: int = 60, k: int = 3):
+    # 生成下一个批次，回忆批或者复习批
+    # 如果是新词批，则返回 None
+    left_bound, right_bound = 0.3, 0.6
+    user_book_id = user_book.id
+    batch_size = user_book.batch_size
+    # actions, batch_id_to_batch, batch_id_to_words = get_user_memory_batch_history_in_minutes(db, user_book_id, minutes)
+    # memorizing_words = sum(list(batch_id_to_words.values()), [])
+    memorizing_words = get_user_memory_word_history_in_minutes(db, user_book_id, minutes)
+    if len(memorizing_words) < k * batch_size:
+        # 1. 记忆新词数过少
+        # 新词批
+        logger.info("新词批")
+        return None
+    # 计算记忆效率
+    memory_actions = get_actions_at_each_word(db, [w.vc_id for w in memorizing_words])
+    remember_count = defaultdict(int)
+    forget_count = defaultdict(int)
+    for a in memory_actions:
+        if a.action == "remember":
+            remember_count[a.word_id] += 1
+        else:
+            forget_count[a.word_id] += 1
+    word_id_to_efficiency = {}
+    for word in memorizing_words:
+        efficiency = remember_count[word.vc_id] / (remember_count[word.vc_id] + forget_count[word.vc_id])
+        word_id_to_efficiency[word.vc_id] = efficiency
+    logger.info([(w.vc_vocabulary, word_id_to_efficiency[w.vc_id]) for w in memorizing_words].sort(key=lambda x: x[1]))
+    if all([efficiency > right_bound for efficiency in word_id_to_efficiency.values()] + [count > 3 for count in remember_count.values()]):
+        # 2. 记忆效率过高
+        # 新词批
+        logger.info("新词批")
+        return None
+    forgot_word_ids = [word_id for word_id, efficiency in word_id_to_efficiency.items() if efficiency < left_bound]
+    forgot_word_ids.sort(key=lambda x: word_id_to_efficiency[x])
+    if len(forgot_word_ids) >= batch_size:
+        # 4. 正常情况
+        # 复习批
+        logger.info("复习批")
+        batch_words = [word for word in memorizing_words if word.vc_id in forgot_word_ids][:batch_size]
+        batch_words.sort(key=lambda x: x.vc_difficulty, reverse=True)
+        batch_words_str_list = [word.vc_vocabulary for word in batch_words]
+        story, translated_story = generate_story_and_translated_story(batch_words_str_list)
+        user_memory_batch = create_user_memory_batch(db, UserMemoryBatchCreate(
+            user_book_id=user_book_id,
+            story=story,
+            translated_story=translated_story,
+            batch_type="复习",
+        ))
+        create_user_memory_batch_generation_history(db, UserMemoryBatchGenerationHistoryCreate(
+            batch_id=user_memory_batch.id,
+            story=story,
+            translated_story=translated_story
+        ))
+        for word in batch_words:
+            memory_word = UserMemoryWordCreate(
+                batch_id=user_memory_batch.id,
+                word_id=word.vc_id
+            )
+            db_memory_word = schema.UserMemoryWord(**memory_word.dict())
+            db.add(db_memory_word)
+        db.commit()
+        return user_memory_batch
+    unfarmiliar_word_ids = [word_id for word_id, efficiency in word_id_to_efficiency.items() if left_bound <= efficiency < right_bound]
+    unfarmiliar_word_ids.sort(key=lambda x: word_id_to_efficiency[x])
+    if len(unfarmiliar_word_ids) < batch_size:
+        # 把记住次数少的也加进来
+        unfarmiliar_word_ids += [word_id for word_id, count in remember_count.items() if count < 3]
+        unfarmiliar_word_ids.sort(key=lambda x: word_id_to_efficiency[x])
+    if len(unfarmiliar_word_ids) >= batch_size:
+        # 3. 记忆效率过低
+        # 回忆批
+        logger.info("回忆批")
+        batch_words = [word for word in memorizing_words if word.vc_id in unfarmiliar_word_ids][:batch_size]
+        batch_words.sort(key=lambda x: x.vc_difficulty, reverse=True)
+        batch_words_str_list = [word.vc_vocabulary for word in batch_words]
+        story, translated_story = generate_story_and_translated_story(batch_words_str_list)
+        user_memory_batch = create_user_memory_batch(db, UserMemoryBatchCreate(
+            user_book_id=user_book_id,
+            story=story,
+            translated_story=translated_story,
+            batch_type="回忆",
+        ))
+        create_user_memory_batch_generation_history(db, UserMemoryBatchGenerationHistoryCreate(
+            batch_id=user_memory_batch.id,
+            story=story,
+            translated_story=translated_story
+        ))
+        for word in batch_words:
+            memory_word = UserMemoryWordCreate(
+                batch_id=user_memory_batch.id,
+                word_id=word.vc_id
+            )
+            db_memory_word = schema.UserMemoryWord(**memory_word.dict())
+            db.add(db_memory_word)
+        db.commit()
+        return user_memory_batch
+    # 5. 正常情况
+    # 新词批
+    logger.info("新词批")
+    return None

output/logs/.gitkeep ADDED Viewed

File without changes

story_agent.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import traceback
+from typing import List, Tuple
+from loguru import logger
+from pydantic import BaseModel, Field
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+from langchain.output_parsers import (
+    PydanticOutputParser,
+    OutputFixingParser,
+)
+# LLM.py 是我自己的语言模型，你可以直接使用 openai 的
+# from langchain.llms.openai import OpenAIChat
+from LLM import OpenAIChat
+TEMPLATE = """\
+please write a story at least 5 sentences long, using the words [{words}].
+{format}
+Attention! The words should be highlighted, surrounded by "`". Therefore, the story should be in the following format.
+English: ... `word1` ... `word2` ...
+Chinese: ... `单词1` ... `单词2` ...
+"""
+class Story(BaseModel):
+    story: str = Field(description="the story")
+    translated_story: str = Field(description="the translated story")
+llm = OpenAIChat(model_name="gpt-3.5-turbo", temperature=0.3)
+parser = PydanticOutputParser(pydantic_object=Story)
+prompt_template = PromptTemplate(
+    template=TEMPLATE,
+    input_variables=["words"],
+    partial_variables={
+        "format": parser.get_format_instructions(),
+    }
+)
+parser = OutputFixingParser.from_llm(parser=parser, llm=llm)
+chain = LLMChain(
+    llm=llm,
+    prompt=prompt_template,
+    output_parser=parser,
+    verbose=False,
+)
+def tell_story(words: List[str]):
+    count = 0
+    while count < 10:
+        count += 1
+        try:
+            resp: Story = chain.run(", ".join(words))
+            if len(resp.story.strip()) == 0:
+                continue
+            if len(resp.translated_story.strip()) == 0:
+                continue
+            return resp
+        except Exception as e:
+            logger.error(e)
+            logger.error(traceback.format_exc())
+            logger.error("retrying...")
+            continue
+    return Story(story="", translated_story="")
+def generate_story_and_translated_story(words: List[str]) -> Tuple[str, str]:
+    resp = tell_story(words)
+    return resp.story, resp.translated_story

web.py ADDED Viewed

	@@ -0,0 +1,696 @@

+from contextlib import contextmanager
+import gradio as gr
+from database.operation import *
+from memorize import *
+from database import SessionLocal, engine, Base
+import database.schema as schema
+from database import constant
+import time
+import asyncio
+import pandas as pd
+from collections import defaultdict
+from datetime import datetime
+Base.metadata.create_all(bind=engine)
+db = SessionLocal()
+@contextmanager
+def session_scope():
+    try:
+        yield db
+        db.commit()
+    except Exception:
+        db.rollback()
+        raise
+    finally:
+        db.close()
+intro = """\
+目标场景：只考虑记住单词及其意思，使得能无障碍阅读，不考虑用于写作。
+主要想法：批量记单词，每批 n 个单词，这 n 个单词用 AI 生成故事，复述故事即可记住单词。
+为什么？
+- 批量记单词，一次可以记住 n 个单词，而不是一个一个记，效率高。
+- 复述故事，即费曼学习法，故事是单词的记忆之锚。
+- 复述故事而不是复述单词，故事具有连续性，更符合人类天性，容易记。
+### 使用建议
+1. 记单词前，先完整过一遍全部单词，剔除已记住的单词，从而提高新词密度
+2. 先看单词表格，然后看英文故事，对照中文完成记忆
+3. 记忆完成后需要一个一个勾选已记住的单词，勾选时尝试复述单词意思，以此来检验记忆效果
+> 本项目基于[开源数据集](https://github.com/LinXueyuanStdio/DictionaryData)，并且[开源代码](https://github.com/LinXueyuanStdio/oh-my-words)，欢迎大家贡献代码～
+"""
+with gr.Blocks(title="批量记单词") as demo:
+    # gr.Markdown("# 批量记单词")
+    gr.HTML("<h1 align=\"center\">批量记单词</h1>")
+    user = gr.State(value={})
+    # 0. 登录
+    with gr.Tab("主页"):
+        gr.Markdown(intro)
+        gr.Markdown(f"共 {get_book_count(db)} 本书")
+        gr.HTML("""<iframe src="https://ghbtns.com/github-btn.html?user=LinXueyuanStdio&repo=oh-my-words&type=star&count=true&size=small" frameborder="0" scrolling="0" width="170" height="30" title="GitHub"></iframe>""")
+        with gr.Row():
+            with gr.Column():
+                email = gr.TextArea(value=constant.email, lines=1, label="邮箱")
+                password = gr.TextArea(value=constant.password, lines=1, label="密码")
+                login_btn = gr.Button("登录")
+            with gr.Column():
+                register_email = gr.TextArea(value='', lines=1, label="邮箱")
+                register_password = gr.TextArea(value='', lines=1, label="密码")
+                register_btn = gr.Button("立即注册", variant="primary")
+        user_status = gr.Textbox("", lines=1, label="用户状态")
+    # 1. 创建记忆计划
+    tab1 = gr.Tab("创建记忆计划", visible=False)
+    with tab1:
+        select_book = gr.Dropdown([], label="单词书", info="选择一本单词书")
+        batch_size = gr.Number(value=10, label="批次大小")
+        randomize = gr.Checkbox(value=True, label="以单词乱序进行记忆")
+        title = gr.TextArea(value='单词书', lines=1, label="记忆计划的名称")
+        btn = gr.Button("创建记忆计划")
+        status = gr.Textbox("", lines=1, label="状态")
+        def submit(user: Dict[str, str], book, title, randomize, batch_size):
+            user_id = user.get("id", None)
+            if user_id is None:
+                gr.Error("请先登录")
+                return "请先登录"
+            book_id = book.split(" [")[1][:-1]
+            user_book = create_user_book(db, UserBookCreate(
+                owner_id=user_id,
+                book_id=book_id,
+                title=title,
+                random=randomize,
+                batch_size=batch_size
+            ))
+            if user_book is not None:
+                return "成功"
+            else:
+                return "失败"
+        btn.click(submit, [user, select_book, title, randomize, batch_size], [status])
+        def on_select(user: Dict[str, str], evt: gr.SelectData):
+            user_id = user.get("id", None)
+            new_options = []
+            if user_id is None:
+                return gr.Dropdown(choices=new_options), "请先登录"
+            books = get_all_books_for_user(db, user_id)
+            new_options = [f"{'⭐ ' if book.permission == 'private' else ''}{book.bk_name} (共 {book.bk_item_num} 词)  [{book.bk_id}]" for book in books]
+            return gr.Dropdown(choices=new_options), f"您好，{user['email']}"
+        tab1.select(on_select, [user], [select_book, status])
+    # 2. 选择单词分批
+    with gr.Tab("选择单词分批") as tab2:
+        select_user_book = gr.Dropdown(
+            [], label="记忆计划", info="请选择记忆计划"
+        )
+        word_count = gr.Number(value=0, label="单词个数")
+        known_words = gr.CheckboxGroup(
+            [], label="已学会的单词", info="正式记忆前将去除已学会的单词，提高每个批次的新词密度，进而提高效率"
+        )
+        btn = gr.Button("生成批次")
+        status = gr.Textbox("3000 词大概要 2 小时才能写完所有的故事", lines=1, label="生成结果")
+        def on_select_user(user):
+            user_id = user.get("id", None)
+            if user_id is None:
+                gr.Error("请先登录")
+                return gr.Dropdown(choices=[]), "请先登录"
+            new_options =  []
+            user_book = get_user_books_by_owner_id(db, user_id)
+            new_options = [f"{book.title} | {book.batch_size}个单词一组 [{book.id}]" for book in user_book]
+            return gr.Dropdown(choices=new_options), "3000 词大概要 2 小时才能写完所有的故事"
+        def on_select_user_book(user_book):
+            logger.debug(f'user_book {user_book}')
+            if user_book is None:
+                return 0, gr.CheckboxGroup(choices=[])
+            new_options = []
+            user_book_id = user_book.split(" [")[1][:-1]
+            user_book = get_user_book(db, user_book_id)
+            book_id = user_book.book_id
+            book = get_book(db, book_id)
+            if book is None:
+                return 0, gr.CheckboxGroup(choices=[])
+            words = get_words_for_book(db, user_book)
+            new_options = [f"{word.vc_vocabulary}" for word in words]
+            return len(words), gr.CheckboxGroup(choices=new_options)
+        select_user_book.select(on_select_user_book, inputs=[select_user_book], outputs=[word_count, known_words])
+        tab2.select(on_select_user, [user], [select_user_book, status])
+        def submit(user_book, known_words):
+            start_time = time.time()
+            user_book_id = user_book.split(" [")[1][:-1]
+            user_book = get_user_book(db, user_book_id)
+            all_words = get_words_for_book(db, user_book)
+            unknown_words = []
+            for w in all_words:
+                if w.vc_vocabulary not in known_words:
+                    unknown_words.append(w)
+            track(db, user_book, unknown_words)
+            end_time = time.time()
+            duration = end_time - start_time
+            return f"成功！分为 {len(unknown_words) // user_book.batch_size} 个批次，共 {len(unknown_words)} 个单词，耗时 {duration:.2f} 秒"
+        btn.click(submit, [select_user_book, known_words], [status])
+    # 3. 记忆
+    with gr.Tab("记忆") as tab3:
+        select_user_book = gr.Dropdown(
+            [], label="记忆计划", info="请选择记忆计划"
+        )
+        info = gr.Accordion(f"新词", open=False)
+        with info:
+            gr.Markdown(f"新词")
+        dataframe_header = ["单词", "中文词意", "英式音标", "美式音标", "记忆量"]
+        memorizing_dataframe = gr.Dataframe(
+            headers=dataframe_header,
+            datatype=["str"] * len(dataframe_header),
+            col_count=(len(dataframe_header), "fixed"),
+            wrap=True,
+        )
+        batches = gr.State(value=[])
+        current_batch_index = gr.State(value=-1)
+        user_book_id = gr.State(value=None)
+        with gr.Row():
+            # story = gr.HighlightedText([])
+            # translated_story = gr.HighlightedText([])
+            # story = gr.Textbox()
+            # translated_story = gr.Textbox()
+            story = gr.Markdown()
+            translated_story = gr.Markdown()
+            # 试了一下，还是 markdown 的显示效果好
+        memorize_action = gr.CheckboxGroup(choices=[], label="记住的单词", info="能够复述出意思才算记住")
+        with gr.Row():
+            previous_batch_btn = gr.Button("上一批")
+            regenerate_btn = gr.Button("重新生成故事")
+            next_batch_btn = gr.Button("下一批", variant="primary")
+        progress = gr.Slider(1, 1, value=1, step=1, label="进度", info="")
+        def on_select_user(user):
+            user_id = user.get("id", None)
+            if user_id is None:
+                gr.Error("请先登录")
+                return gr.Dropdown(choices=[])
+            new_options =  []
+            user_book = get_user_books_by_owner_id(db, user_id)
+            new_options = [f"{book.title} | {book.batch_size}个单词一组 [{book.id}]" for book in user_book]
+            return gr.Dropdown(choices=new_options)
+        def update_from_batch(memorizing_batch: UserMemoryBatch):
+            new_options = []
+            word_df = []
+            # logger.debug(get_user_memory_batch(db, memorizing_batch.id))
+            # logger.debug(memorizing_batch.id)
+            # logger.debug(get_user_memory_words_by_batch_id(db, memorizing_batch.id))
+            # logger.debug(get_words_by_ids(db, [w.word_id for w in get_user_memory_words_by_batch_id(db, memorizing_batch.id)]))
+            # words = get_words_in_batch(db, memorizing_batch.id)
+            # words = get_words_by_ids(db, [w.word_id for w in memorizing_words])
+            memorizing_words = get_user_memory_words_by_batch_id(db, memorizing_batch.id)
+            words = get_words_by_ids(db, [w.word_id for w in memorizing_words])
+            # 统计记忆量
+            actions = get_actions_at_each_word(db, [w.word_id for w in memorizing_words])
+            remember_count = defaultdict(int)
+            forget_count = defaultdict(int)
+            for a in actions:
+                if a.action == "remember":
+                    remember_count[a.word_id] += 1
+                else:
+                    forget_count[a.word_id] += 1
+            # 统计记忆效率
+            batch_actions = get_user_memory_batch_actions_by_user_memory_batch_id(db, memorizing_batch.id)
+            batch_actions.sort(key=lambda x: x.create_time)
+            start, end = None, None
+            total_duration = None
+            for a in batch_actions:
+                if a.action == "start":
+                    start: datetime = a.create_time
+                elif a.action == "end":
+                    end: datetime = a.create_time
+                    if start is None:
+                        continue
+                    if total_duration is None:
+                        total_duration = end - start
+                    else:
+                        total_duration += end - start
+            memory_speed = f"{memorizing_batch.batch_type}"
+            if total_duration is not None:
+                sec = total_duration.total_seconds()
+                minutes = sec / 60
+                memory_speed += f"：当前批次记忆效率 {len(memorizing_words) / minutes:.2f} 词/分钟，{minutes:.2f} 分钟/批次"
+            # 单词信息表格与勾选
+            for w in words:
+                new_options.append(f"{w.vc_vocabulary}")
+                word_df.append([
+                    w.vc_vocabulary,  # 单词
+                    w.vc_translation,  # 中文词意
+                    w.vc_phonetic_uk,  # 英式音标
+                    w.vc_phonetic_us,  # 美式音标
+                    f"{remember_count[w.vc_id]} / {remember_count[w.vc_id] + forget_count[w.vc_id]}",  # 记忆量
+                ])
+            df = pd.DataFrame(word_df, columns=dataframe_header)
+            if memorizing_batch.batch_type == "回忆":
+                df = pd.DataFrame([[row[0], "", row[2], row[3], row[4]] for row in word_df], columns=dataframe_header)
+            # 故事
+            story = memorizing_batch.story
+            translated_story = memorizing_batch.translated_story
+            if len(story) == 0 or len(translated_story) == 0:
+                story, translated_story = regenerate_for_batch(memorizing_batch, words)
+            logger.info("计算批次信息")
+            logger.info(new_options)
+            logger.info(story)
+            logger.info(translated_story)
+            logger.info("=" * 8)
+            return (gr.Accordion(label=memory_speed), df, story, translated_story, gr.CheckboxGroup(choices=new_options))
+        def on_select_user_book(user_book_id: str):
+            """
+            1. 当前单词
+            2. 对当前单词的操作
+            3. 故事
+            """
+            logger.debug(f'user_book {user_book_id}')
+            if user_book_id is None:
+                # 为什么会空？这里返回的东西可能会爆炸，但好像执行不到这里
+                # 不管了，放个告示牌在这里，大家看见这个坑请绕着走
+                return [], gr.CheckboxGroup(choices=[])
+            user_book_id: str = user_book_id.split(" [")[1][:-1]
+            user_book = get_user_book(db, user_book_id)
+            batches = get_new_user_memory_batches_by_user_book_id(db, user_book_id)  # 只缓存新词
+            batch_id = user_book.memorizing_batch
+            memorizing_batch = get_user_memory_batch(db, batch_id)
+            current_batch_index = -1
+            if memorizing_batch is not None:
+                for index, b in enumerate(batches):
+                    if b.id == memorizing_batch.id:
+                        current_batch_index = index
+                        break
+            if current_batch_index == -1:
+                # 当前还没开始记忆，或者当前批次不是新词批次
+                current_batch_index = 0
+                memorizing_batch = batches[0]
+                batch_id = memorizing_batch.id
+                user_book.memorizing_batch = batch_id
+                update_user_book(db, user_book_id, UserBookUpdate(
+                    owner_id=user_book.owner_id,
+                    book_id=user_book.book_id,
+                    title=user_book.title,
+                    random=user_book.random,
+                    batch_size=user_book.batch_size,
+                    memorizing_batch=batch_id
+                ))
+            updates = update_from_batch(memorizing_batch)
+            on_batch_start(db, memorizing_batch.id)
+            asyncio.run(pregenerate(batches, current_batch_index))
+            return (batches, current_batch_index, user_book) + updates + (
+                    gr.Slider(
+                        minimum=1,
+                        maximum=len(batches),
+                        value=current_batch_index,
+                    ),)
+        batch_widget = [info, memorizing_dataframe, story, translated_story, memorize_action]
+        tab3.select(on_select_user, inputs=[user], outputs=[select_user_book])
+        select_user_book.select(
+            on_select_user_book,
+            inputs=[select_user_book],
+            outputs=[batches, current_batch_index, user_book_id] + batch_widget + [progress]
+        )
+        async def worker_regenerate_for_batch(batches: List[UserMemoryBatch], index: int):
+            started_at = time.monotonic()
+            logger.info(f"started {index}")
+            # start
+            batch = batches[index]
+            story = batch.story
+            translated_story = batch.translated_story
+            if len(story) == 0 or len(translated_story) == 0:
+                batch_words = get_words_in_batch(db, batch.id)
+                regenerate_for_batch(batch, batch_words)
+            # end
+            total = time.monotonic() - started_at
+            logger.info(f'completed in {total:.2f} seconds')
+        async def pregenerate(batches: List[UserMemoryBatch], current_batch_index: int):
+            logger.info("开始预生成故事")
+            indexes = [current_batch_index+i+1 for i in range(3)]+[current_batch_index-i-1 for i in range(3)]
+            indexes = [i for i in indexes if 0 <= i < len(batches)]
+            for index in indexes:
+                asyncio.ensure_future(worker_regenerate_for_batch(batches, index))
+            logger.info("结束预生成故事")
+        def submit_batch(batches: List[UserMemoryBatch], current_batch_index: int):
+            memorizing_batch = batches[current_batch_index]
+            return set_memorizing_batch(batches, current_batch_index, memorizing_batch)
+        def set_memorizing_batch(batches: List[UserMemoryBatch], current_batch_index: int, memorizing_batch: UserMemoryBatch):
+            updates = update_from_batch(memorizing_batch)
+            asyncio.run(pregenerate(batches, current_batch_index))
+            logger.info("pregenerated")
+            return updates + (gr.Slider(value=current_batch_index+1), current_batch_index)
+        def save_progress(old_batch: UserMemoryBatch, memorize_action: List[str]):
+            # 保存单词记忆进度
+            actions = []
+            words = get_words_in_batch(db, old_batch.id)
+            for word in words:
+                if word.vc_vocabulary in memorize_action:
+                    actions.append((word.vc_id, "remember"))
+                else:
+                    actions.append((word.vc_id, "forget"))
+            save_memorizing_word_action(db, old_batch.id, actions)
+        def previous_batch(batches: List[UserMemoryBatch], current_batch_index: int, user_book: schema.UserBook, memorize_action: List[str]):
+            old_index = current_batch_index
+            if current_batch_index <= 0:
+                current_batch_index = 0
+            elif current_batch_index > 0:
+                current_batch_index -= 1
+            if current_batch_index != old_index:
+                # 下一页之前需要保存记忆进度
+                # logger.info("下一页之前需要保存记忆进度")
+                # logger.info(memorize_action)
+                # 保存批次进度
+                old_batch = batches[old_index]
+                current_batch = batches[current_batch_index]
+                save_progress(old_batch, memorize_action)
+                on_batch_end(db, old_batch.id)
+                on_batch_start(db, current_batch.id)
+                user_book_id = user_book.id
+                update_user_book_memorizing_batch(db, user_book_id, current_batch.id)
+            return submit_batch(batches, current_batch_index)
+        def next_batch(batches: List[UserMemoryBatch], current_batch_index: int, user_book: schema.UserBook, memorize_action: List[str]):
+            old_index = current_batch_index
+            if current_batch_index >= len(batches)-1:
+                current_batch_index = len(batches)-1
+            elif current_batch_index < len(batches) - 1:
+                current_batch_index += 1
+            if current_batch_index != old_index:
+                # 下一页之前需要保存记忆进度
+                # logger.info("下一页之前需要保存记忆进度")
+                # logger.info(memorize_action)
+                # 保存批次进度
+                old_batch = batches[old_index]
+                memorizing_batch = get_user_memory_batch(db, user_book.memorizing_batch)
+                if memorizing_batch is not None:
+                    old_batch = memorizing_batch
+                current_batch = batches[current_batch_index]
+                save_progress(old_batch, memorize_action)
+                on_batch_end(db, old_batch.id)
+                next_batch = generate_next_batch(db, user_book, minutes=60, k=3)
+                if next_batch is not None:
+                    current_batch = next_batch
+                on_batch_start(db, current_batch.id)
+                user_book_id = user_book.id
+                update_user_book_memorizing_batch(db, user_book_id, current_batch.id)
+                if next_batch is not None:
+                    return set_memorizing_batch(batches, old_index, current_batch)
+                else:
+                    return set_memorizing_batch(batches, current_batch_index, current_batch)
+            else:
+                memorizing_batch = get_user_memory_batch(db, user_book.memorizing_batch)
+                current_batch = batches[current_batch_index]
+                save_progress(memorizing_batch, memorize_action)
+                on_batch_end(db, memorizing_batch.id)
+                next_batch = generate_next_batch(db, user_book, minutes=60, k=3)
+                if next_batch is not None:
+                    current_batch = next_batch
+                on_batch_start(db, current_batch.id)
+                user_book_id = user_book.id
+                update_user_book_memorizing_batch(db, user_book_id, current_batch.id)
+                if next_batch is not None:
+                    return set_memorizing_batch(batches, old_index, current_batch)
+                else:
+                    return set_memorizing_batch(batches, current_batch_index, current_batch)
+        previous_batch_btn.click(
+            previous_batch,
+            inputs=[batches, current_batch_index, user_book_id, memorize_action],
+            outputs=batch_widget + [progress, current_batch_index]
+        )
+        next_batch_btn.click(
+            next_batch,
+            inputs=[batches, current_batch_index, user_book_id, memorize_action],
+            outputs=batch_widget + [progress, current_batch_index]
+        )
+        def regenerate_for_batch(memorizing_batch: UserMemoryBatch, batch_words: List[Word]):
+            batch_words_str_list = [word.vc_vocabulary for word in batch_words]
+            logger.info(f"生成故事 {batch_words_str_list}")
+            story, translated_story = generate_story_and_translated_story(batch_words_str_list)
+            memorizing_batch.story = story
+            memorizing_batch.translated_story = translated_story
+            db.commit()
+            db.refresh(memorizing_batch)
+            create_user_memory_batch_generation_history(db, UserMemoryBatchGenerationHistoryCreate(
+                batch_id=memorizing_batch.id,
+                story=story,
+                translated_story=translated_story
+            ))
+            logger.info(story)
+            logger.info(translated_story)
+            return story, translated_story
+        def regenerate(batches: List[UserMemoryBatch], current_batch_index: int):
+            # 重新生成故事
+            memorizing_batch = batches[current_batch_index]
+            batch_words = get_words_in_batch(db, memorizing_batch.id)
+            story, translated_story = regenerate_for_batch(memorizing_batch, batch_words)
+            return story, translated_story
+        regenerate_btn.click(regenerate, inputs=[batches, current_batch_index], outputs=[story, translated_story])
+    # 4. 从记忆计划中创建单词书
+    with gr.Tab("从记忆计划中创建单词书") as tab4:
+        select_user_book = gr.Dropdown(
+            [], label="记忆计划", info="请选择记忆计划"
+        )
+        word_count = gr.Number(value=0, label="单词个数")
+        known_words = gr.CheckboxGroup(
+            [], label="已学会的单词", info="请检查已学会的单词，这些单词将不会被包含在新的单词书中"
+        )
+        title = gr.TextArea(value='单词书', lines=1, label="单词书的名称")
+        btn = gr.Button("从记忆计划中创建单词书")
+        status = gr.Textbox("", lines=1, label="状态")
+        def on_select_user(user):
+            user_id = user.get("id", None)
+            if user_id is None:
+                gr.Error("请先登录")
+                return gr.Dropdown(choices=[])
+            new_options =  []
+            user_book = get_user_books_by_owner_id(db, user_id)
+            new_options = [f"{book.title} | {book.batch_size}个单词一组 [{book.id}]" for book in user_book]
+            return gr.Dropdown(choices=new_options)
+        def on_select_user_book(user_book):
+            logger.debug(f'user_book {user_book}')
+            if user_book is None:
+                return 0, gr.CheckboxGroup(choices=[])
+            new_options = []
+            user_book_id = user_book.split(" [")[1][:-1]
+            words = get_words_in_user_book(db, user_book_id)
+            new_options = [f"{word.vc_vocabulary}" for word in words]
+            return len(words), gr.CheckboxGroup(choices=new_options)
+        tab4.select(on_select_user, inputs=[user], outputs=[select_user_book])
+        select_user_book.select(on_select_user_book, inputs=[select_user_book], outputs=[word_count, known_words])
+        def submit(user, user_book, known_words, title):
+            user_id = user.get("id", None)
+            if user_id is None:
+                gr.Error("请先登录")
+                return "请先登录"
+            user_book_id = user_book.split(" [")[1][:-1]
+            all_words = get_words_in_user_book(db, user_book_id)
+            unknown_words = []
+            for w in all_words:
+                if w.vc_vocabulary not in known_words:
+                    unknown_words.append(w)
+            # all_words = get_words_by_vocabulary(db, known_words)
+            book = save_words_as_book(db, user_id, unknown_words, title)
+            if book is not None:
+                return f"成功生成一本单词书：{book.bk_name}"
+            else:
+                return "失败"
+        btn.click(submit, [user, select_user_book, known_words, title], [status])
+    # 5. 统计
+    with gr.Tab("统计") as tab5:
+        # 5.1. 故事生成历史
+        with gr.Tab("AI 历史记录") as tab51:
+            select_user_book = gr.Dropdown(
+                [], label="记忆计划", info="请选择记忆计划"
+            )
+            history_header = ["单词", "故事", "中文故事", "生成时间"]
+            history_dataframe = gr.Dataframe(
+                headers=history_header,
+                datatype=["str"] * len(history_header),
+                col_count=(len(history_header), "fixed"),
+                wrap=True,
+                min_width=320,
+                height=800,
+            )
+            def on_select_user(user):
+                user_id = user.get("id", None)
+                if user_id is None:
+                    gr.Error("请先登录")
+                    return gr.Dropdown(choices=[])
+                new_options =  []
+                user_book = get_user_books_by_owner_id(db, user_id)
+                new_options = [f"{book.title} | {book.batch_size}个单词一组 [{book.id}]" for book in user_book]
+                return gr.Dropdown(choices=new_options)
+            def on_select_user_book(user_book_id):
+                logger.debug(f'user_book {user_book_id}')
+                if user_book_id is None:
+                    return 0, gr.CheckboxGroup(choices=[])
+                user_book_id = user_book_id.split(" [")[1][:-1]
+                batch_id_to_words_and_history = get_generation_hostorys_by_user_book_id(db, user_book_id)
+                data = []
+                for batch_id, (words, histories) in batch_id_to_words_and_history.items():
+                    for history in histories:
+                        word = ", ".join([w.vc_vocabulary for w in words])
+                        story = history.story
+                        translated_story = history.translated_story
+                        create_time = history.create_time
+                        data.append([word, story, translated_story, create_time])
+                df = pd.DataFrame(data, columns=history_header)
+                return df
+            tab51.select(on_select_user, inputs=[user], outputs=[select_user_book])
+            select_user_book.select(on_select_user_book, inputs=[select_user_book], outputs=[history_dataframe])
+        # 5.2. 记忆历史记录
+        with gr.Tab("记忆历史记录") as tab52:
+            select_user_book = gr.Dropdown(
+                [], label="记忆计划", info="请选择记忆计划"
+            )
+            batch_history_header = ["单词", "故事", "中文故事", "批次类型", "记忆情况", "生成时间"]
+            batch_history_dataframe = gr.Dataframe(
+                headers=batch_history_header,
+                datatype=["str"] * len(batch_history_header),
+                col_count=(len(batch_history_header), "fixed"),
+                wrap=True,
+                min_width=320,
+                height=800,
+            )
+            def on_select_user(user):
+                user_id = user.get("id", None)
+                if user_id is None:
+                    gr.Error("请先登录")
+                    return gr.Dropdown(choices=[])
+                new_options =  []
+                user_book = get_user_books_by_owner_id(db, user_id)
+                new_options = [f"{book.title} | {book.batch_size}个单词一组 [{book.id}]" for book in user_book]
+                return gr.Dropdown(choices=new_options)
+            def on_select_user_book(user_book_id):
+                logger.debug(f'user_book {user_book_id}')
+                if user_book_id is None:
+                    return 0, gr.CheckboxGroup(choices=[])
+                user_book_id = user_book_id.split(" [")[1][:-1]
+                actions, batch_id_to_batch, batch_id_to_words, batch_id_to_actions = get_user_memory_batch_history(db, user_book_id)
+                data = []
+                for action in actions:
+                    batch_id = action.batch_id
+                    words = batch_id_to_words[batch_id]
+                    word = ", ".join([w.vc_vocabulary for w in words])
+                    batch = batch_id_to_batch[batch_id]
+                    story = batch.story
+                    translated_story = batch.translated_story
+                    batch_type = batch.batch_type
+                    memory_actions = batch_id_to_actions.get(batch_id, [])
+                    remember_word_ids = {a.word_id for a in memory_actions if a.action == "remember"}
+                    remember_words = []
+                    forget_words = []
+                    for w in words:
+                        if w.vc_id in remember_word_ids:
+                            remember_words.append(w.vc_vocabulary)
+                        else:
+                            forget_words.append(w.vc_vocabulary)
+                    memory_status = f"记住 {len(remember_words)} 个单词，忘记 {len(forget_words)} 个单词"
+                    memory_status += f"，记住的单词：{', '.join(remember_words)}"
+                    memory_status += f"，忘记的单词：{', '.join(forget_words)}"
+                    create_time = action.create_time
+                    data.append([word, story, translated_story, batch_type, memory_status, create_time])
+                df = pd.DataFrame(data, columns=batch_history_header)
+                return df
+            tab52.select(on_select_user, inputs=[user], outputs=[select_user_book])
+            select_user_book.select(on_select_user_book, inputs=[select_user_book], outputs=[batch_history_dataframe])
+    on_login_success_ui = [email, password, login_btn, register_email, register_password, register_btn]
+    on_login_success_ui += [tab1]
+    def on_login(login_success):
+        return (
+            gr.TextArea(visible=not login_success),
+            gr.TextArea(visible=not login_success),
+            gr.Button(visible=not login_success),
+            gr.TextArea(visible=not login_success),
+            gr.TextArea(visible=not login_success),
+            gr.Button(visible=not login_success),
+            # gr.Accordion(visible=not login_success),
+        ) + (
+            gr.Tab(visible=login_success),
+        )
+    def login(email, password):
+        user = get_user_by_email(db, email)
+        if password is None or len(password) == 0:
+            return {
+                "id": "",
+                "email": "",
+            }, "登录失败", *on_login(False)
+        if user is None or not user.verify_password(password):
+            return {
+                "id": "",
+                "email": "",
+            }, "登录失败", *on_login(False)
+        return {
+            "id": user.id,
+            "email": user.email,
+        }, "登录成功", *on_login(True)
+    login_btn.click(login, [email, password], [user, user_status] + on_login_success_ui)
+    def register(email, password):
+        user = get_user_by_email(db, email)
+        if user is not None:
+            return {
+                "id": "",
+                "email": "",
+            }, "注册失败，该邮箱已被注册", *on_login(False)
+        else:
+            user = create_user(db, email=email, password=password)
+        return {
+            "id": user.id,
+            "email": user.email,
+        }, "注册并登录成功", *on_login(True)
+    register_btn.click(register, [register_email, register_password], [user, user_status] + on_login_success_ui)
+if __name__ == "__main__":
+    # import os
+    # os.environ["no_proxy"] = "localhost,127.0.0.1,::1"
+    # demo.launch(server_name="127.0.0.1", server_port=8090, debug=True)
+    logger.add(f"output/logs/web_{date_str}.log", rotation="1 day", retention="7 days", level="INFO")
+    demo.launch()