import lang # import requests import json import httpx #import aiofiles from imgsearch import search_img from re import match, sub from env import HOST_URL from PIL import Image from env import LLM_API_KEY def remove_numbering(text): """Removes list-like numberings (e.g., 1., 2. etc.) from a string. Args: text: The string to process. Returns: The string with numberings removed. """ pattern = r"\d+\.\s?" # Matches one or more digits followed by a dot and optional space return sub(pattern, "", text) async def tldr(content, l=lang.VI_VN): async with httpx.AsyncClient() as client: _r = await client.post( url="https://api.awanllm.com/v1/chat/completions", headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'}, data=json.dumps({ "model": "Meta-Llama-3-8B-Instruct", "messages": [ {"role": "user", "content": f"tl;dr in {l}: {content}"} ], "presence_penalty":0.3, "temperature":0.55 }), timeout=None, ) _summary = _r.json() # print(_summary) return _summary['choices'][0]['message']['content'].split('\n',1)[-1].strip() # import io # import asyncio # from functools import partial # def optimize_image(data, _fn): # with Image.open(io.BytesIO(data)) as img: # img.save(f'{_fn}.webp', optimize=True) # async def download_img(_url, _prefix=HOST_URL): # _r = None # async with httpx.AsyncClient() as client: # _r = await client.get(url=_url, timeout=None) # _fn = f'img_{str(hash(_url))}' # if _r.status_code//100 == 2: # _loop = asyncio.get_running_loop() # _out = await _loop.run_in_executor(None, partial(optimize_image, _r.content, _fn)) # return f"{_prefix}/images/{_fn}.webp" async def fetch_img_for_words(words: list[str], __url_prefix=None): print("fetching images...") _img_link = [search_img(r) for r in words] return [(word,img) for (word, img) in zip(words, _img_link)] async def get_definitions_from_words(words: list[str], summary: str = "", lang: str = lang.VI_VN): print("running inferrence") async with httpx.AsyncClient() as client: _r = await client.post( url="https://api.awanllm.com/v1/chat/completions", headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'}, data=json.dumps({ "model": "Meta-Llama-3-8B-Instruct", "messages": [ {"role": "user", "content": f"{summary}. Give concise, easy-to-understand definitions for the following keywords: {'; '.join(words)}. DO NOT include the keywords inside their respective definition. Use {lang}."} ], "presence_penalty":0.3, "temperature":0.55 }), timeout=None ) # print(_r.json()['choices'][0]['message']['content'].split('\n')) print(_r.json()['choices'][0]['message']['content'].split('\n')) rets = [] for _x in _r.json()['choices'][0]['message']['content'].split('\n'): try: k, v = _x.split(':') k = k.replace('*','').strip() k = remove_numbering(k) v = v.strip() if (v != '' and not "note: " in k.lower()): rets.append((k, v)) except: continue return rets[:-1] def get_imgs_from_words(words: list[str]): pass def classify_words(words: list[str], deep: bool = False): pass