Spaces:

nhathuy07
/

text2quiz_backend

Running

File size: 3,506 Bytes

dacef8c
 
 
d4c182e
7076d7a
dacef8c
 
d4c182e
8dd92a1
dacef8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7076d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
dacef8c
 
 
 
ca2193d
 
 
 
 
 
d4c182e
ca2193d
 
 
 
d4c182e
ca2193d
 
 
 
d4c182e
ca2193d
d4c182e
 
dacef8c
ca2193d
dacef8c
 
1b68a38
dacef8c
 
 
 
 
 
 
fbef45a
dacef8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd9e29c
dacef8c
 
 
 
 
 
 
 
 
19ac67f

import lang
import requests
import json
import httpx
#import aiofiles
from imgsearch import search_img
from re import match, sub
from env import HOST_URL
from PIL import Image
from env import LLM_API_KEY

def remove_numbering(text):
  """Removes list-like numberings (e.g., 1., 2. etc.) from a string.

  Args:
      text: The string to process.

  Returns:
      The string with numberings removed.
  """
  pattern = r"\d+\.\s?"  # Matches one or more digits followed by a dot and optional space
  return sub(pattern, "", text)

async def tldr(content, l=lang.VI_VN):
    async with httpx.AsyncClient() as client:
        _r = await client.post(
            url="https://api.awanllm.com/v1/chat/completions",
            headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'},
            data=json.dumps({
                "model": "Meta-Llama-3-8B-Instruct",
                "messages": [
                    {"role": "user", "content": f"tl;dr in {l}: {content}"}
                ],
                "presence_penalty":0.3,
                "temperature":0.55
            },
            timeout=None)
        )
    _summary = _r.json()
    # print(_summary)
    return _summary['choices'][0]['message']['content'].split('\n',1)[-1].strip()

# import io
# import asyncio
# from functools import partial
# def optimize_image(data, _fn):
#     with Image.open(io.BytesIO(data)) as img:
#         img.save(f'{_fn}.webp', optimize=True)

# async def download_img(_url, _prefix=HOST_URL):
#     _r = None
#     async with httpx.AsyncClient() as client:
#         _r = await client.get(url=_url, timeout=None)

#     _fn = f'img_{str(hash(_url))}'
#     if _r.status_code//100 == 2:
#         _loop = asyncio.get_running_loop()
#         _out = await _loop.run_in_executor(None, partial(optimize_image, _r.content, _fn))
        
#     return f"{_prefix}/images/{_fn}.webp"

async def fetch_img_for_words(words: list[str], __url_prefix=None):
    print("fetching images...")
    _img_link = [search_img(r) for r in words]
    return [(word,img) for (word, img) in zip(words, _img_link)]

async def get_definitions_from_words(words: list[str], summary: str = "", lang: str = lang.VI_VN):
    print("running inferrence")
    _r = requests.post(
        url="https://api.awanllm.com/v1/chat/completions",
        headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'},
        data=json.dumps({
            "model": "Meta-Llama-3-8B-Instruct",
            "messages": [
                {"role": "user", "content": f"{summary}. Give concise, easy-to-understand definitions for the following keywords: {'; '.join(words)}. DO NOT include the keywords inside their respective definition. Use {lang}."}
            ],
            "presence_penalty":0.3,
            "temperature":0.55
        })
    )

    # print(_r.json()['choices'][0]['message']['content'].split('\n'))
    print(_r.json()['choices'][0]['message']['content'].split('\n'))
    
    rets = []
    for _x in _r.json()['choices'][0]['message']['content'].split('\n'):
        try:
            k, v = _x.split(':')
            k = k.replace('*','').strip()
            k = remove_numbering(k)
            v = v.strip()
            if (v != '' and not "note: " in k.lower()):
                rets.append((k, v))
        except:
            continue
    return rets[:-1]

def get_imgs_from_words(words: list[str]):
    pass

def classify_words(words: list[str], deep: bool = False):
    pass