Spaces:
Running
Running
File size: 3,633 Bytes
dacef8c f3ec82a dacef8c d4c182e 7076d7a dacef8c d4c182e 8dd92a1 dacef8c 7076d7a f3ec82a 7076d7a dacef8c ca2193d d4c182e ca2193d d4c182e ca2193d d4c182e ca2193d d4c182e dacef8c ca2193d dacef8c 1b68a38 dacef8c f3ec82a dacef8c bd9e29c dacef8c 19ac67f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import lang
# import requests
import json
import httpx
#import aiofiles
from imgsearch import search_img
from re import match, sub
from env import HOST_URL
from PIL import Image
from env import LLM_API_KEY
def remove_numbering(text):
"""Removes list-like numberings (e.g., 1., 2. etc.) from a string.
Args:
text: The string to process.
Returns:
The string with numberings removed.
"""
pattern = r"\d+\.\s?" # Matches one or more digits followed by a dot and optional space
return sub(pattern, "", text)
async def tldr(content, l=lang.VI_VN):
async with httpx.AsyncClient() as client:
_r = await client.post(
url="https://api.awanllm.com/v1/chat/completions",
headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'},
data=json.dumps({
"model": "Meta-Llama-3-8B-Instruct",
"messages": [
{"role": "user", "content": f"tl;dr in {l}: {content}"}
],
"presence_penalty":0.3,
"temperature":0.55
}),
timeout=None,
)
_summary = _r.json()
# print(_summary)
return _summary['choices'][0]['message']['content'].split('\n',1)[-1].strip()
# import io
# import asyncio
# from functools import partial
# def optimize_image(data, _fn):
# with Image.open(io.BytesIO(data)) as img:
# img.save(f'{_fn}.webp', optimize=True)
# async def download_img(_url, _prefix=HOST_URL):
# _r = None
# async with httpx.AsyncClient() as client:
# _r = await client.get(url=_url, timeout=None)
# _fn = f'img_{str(hash(_url))}'
# if _r.status_code//100 == 2:
# _loop = asyncio.get_running_loop()
# _out = await _loop.run_in_executor(None, partial(optimize_image, _r.content, _fn))
# return f"{_prefix}/images/{_fn}.webp"
async def fetch_img_for_words(words: list[str], __url_prefix=None):
print("fetching images...")
_img_link = [search_img(r) for r in words]
return [(word,img) for (word, img) in zip(words, _img_link)]
async def get_definitions_from_words(words: list[str], summary: str = "", lang: str = lang.VI_VN):
print("running inferrence")
async with httpx.AsyncClient() as client:
_r = await client.post(
url="https://api.awanllm.com/v1/chat/completions",
headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'},
data=json.dumps({
"model": "Meta-Llama-3-8B-Instruct",
"messages": [
{"role": "user", "content": f"{summary}. Give concise, easy-to-understand definitions for the following keywords: {'; '.join(words)}. DO NOT include the keywords inside their respective definition. Use {lang}."}
],
"presence_penalty":0.3,
"temperature":0.55
}),
timeout=None
)
# print(_r.json()['choices'][0]['message']['content'].split('\n'))
print(_r.json()['choices'][0]['message']['content'].split('\n'))
rets = []
for _x in _r.json()['choices'][0]['message']['content'].split('\n'):
try:
k, v = _x.split(':')
k = k.replace('*','').strip()
k = remove_numbering(k)
v = v.strip()
if (v != '' and not "note: " in k.lower()):
rets.append((k, v))
except:
continue
return rets[:-1]
def get_imgs_from_words(words: list[str]):
pass
def classify_words(words: list[str], deep: bool = False):
pass
|