Spaces:
Running
Running
import lang | |
# import requests | |
import json | |
import httpx | |
#import aiofiles | |
from imgsearch import search_img | |
from re import match, sub | |
from env import HOST_URL | |
from PIL import Image | |
from env import LLM_API_KEY | |
def remove_numbering(text): | |
"""Removes list-like numberings (e.g., 1., 2. etc.) from a string. | |
Args: | |
text: The string to process. | |
Returns: | |
The string with numberings removed. | |
""" | |
pattern = r"\d+\.\s?" # Matches one or more digits followed by a dot and optional space | |
return sub(pattern, "", text) | |
async def tldr(content, l=lang.VI_VN): | |
async with httpx.AsyncClient() as client: | |
_r = await client.post( | |
url="https://api.awanllm.com/v1/chat/completions", | |
headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'}, | |
data=json.dumps({ | |
"model": "Meta-Llama-3-8B-Instruct", | |
"messages": [ | |
{"role": "user", "content": f"tl;dr in {l}: {content}"} | |
], | |
"presence_penalty":0.3, | |
"temperature":0.55 | |
}), | |
timeout=None, | |
) | |
_summary = _r.json() | |
# print(_summary) | |
return _summary['choices'][0]['message']['content'].split('\n',1)[-1].strip() | |
# import io | |
# import asyncio | |
# from functools import partial | |
# def optimize_image(data, _fn): | |
# with Image.open(io.BytesIO(data)) as img: | |
# img.save(f'{_fn}.webp', optimize=True) | |
# async def download_img(_url, _prefix=HOST_URL): | |
# _r = None | |
# async with httpx.AsyncClient() as client: | |
# _r = await client.get(url=_url, timeout=None) | |
# _fn = f'img_{str(hash(_url))}' | |
# if _r.status_code//100 == 2: | |
# _loop = asyncio.get_running_loop() | |
# _out = await _loop.run_in_executor(None, partial(optimize_image, _r.content, _fn)) | |
# return f"{_prefix}/images/{_fn}.webp" | |
async def fetch_img_for_words(words: list[str], __url_prefix=None): | |
print("fetching images...") | |
_img_link = [search_img(r) for r in words] | |
return [(word,img) for (word, img) in zip(words, _img_link)] | |
async def get_definitions_from_words(words: list[str], summary: str = "", lang: str = lang.VI_VN): | |
print("running inferrence") | |
async with httpx.AsyncClient() as client: | |
_r = await client.post( | |
url="https://api.awanllm.com/v1/chat/completions", | |
headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'}, | |
data=json.dumps({ | |
"model": "Meta-Llama-3-8B-Instruct", | |
"messages": [ | |
{"role": "user", "content": f"{summary}. Give concise, easy-to-understand definitions for the following keywords: {'; '.join(words)}. DO NOT include the keywords inside their respective definition. Use {lang}."} | |
], | |
"presence_penalty":0.3, | |
"temperature":0.55 | |
}), | |
timeout=None | |
) | |
# print(_r.json()['choices'][0]['message']['content'].split('\n')) | |
print(_r.json()['choices'][0]['message']['content'].split('\n')) | |
rets = [] | |
for _x in _r.json()['choices'][0]['message']['content'].split('\n'): | |
try: | |
k, v = _x.split(':') | |
k = k.replace('*','').strip() | |
k = remove_numbering(k) | |
v = v.strip() | |
if (v != '' and not "note: " in k.lower()): | |
rets.append((k, v)) | |
except: | |
continue | |
return rets[:-1] | |
def get_imgs_from_words(words: list[str]): | |
pass | |
def classify_words(words: list[str], deep: bool = False): | |
pass | |