File size: 3,506 Bytes
dacef8c
 
 
d4c182e
7076d7a
dacef8c
 
d4c182e
8dd92a1
dacef8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7076d7a
 
 
 
 
 
 
 
 
 
 
 
 
 
dacef8c
 
 
 
ca2193d
 
 
 
 
 
d4c182e
ca2193d
 
 
 
d4c182e
ca2193d
 
 
 
d4c182e
ca2193d
d4c182e
 
dacef8c
ca2193d
dacef8c
 
1b68a38
dacef8c
 
 
 
 
 
 
fbef45a
dacef8c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd9e29c
dacef8c
 
 
 
 
 
 
 
 
19ac67f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import lang
import requests
import json
import httpx
#import aiofiles
from imgsearch import search_img
from re import match, sub
from env import HOST_URL
from PIL import Image
from env import LLM_API_KEY

def remove_numbering(text):
  """Removes list-like numberings (e.g., 1., 2. etc.) from a string.

  Args:
      text: The string to process.

  Returns:
      The string with numberings removed.
  """
  pattern = r"\d+\.\s?"  # Matches one or more digits followed by a dot and optional space
  return sub(pattern, "", text)

async def tldr(content, l=lang.VI_VN):
    async with httpx.AsyncClient() as client:
        _r = await client.post(
            url="https://api.awanllm.com/v1/chat/completions",
            headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'},
            data=json.dumps({
                "model": "Meta-Llama-3-8B-Instruct",
                "messages": [
                    {"role": "user", "content": f"tl;dr in {l}: {content}"}
                ],
                "presence_penalty":0.3,
                "temperature":0.55
            },
            timeout=None)
        )
    _summary = _r.json()
    # print(_summary)
    return _summary['choices'][0]['message']['content'].split('\n',1)[-1].strip()

# import io
# import asyncio
# from functools import partial
# def optimize_image(data, _fn):
#     with Image.open(io.BytesIO(data)) as img:
#         img.save(f'{_fn}.webp', optimize=True)

# async def download_img(_url, _prefix=HOST_URL):
#     _r = None
#     async with httpx.AsyncClient() as client:
#         _r = await client.get(url=_url, timeout=None)

#     _fn = f'img_{str(hash(_url))}'
#     if _r.status_code//100 == 2:
#         _loop = asyncio.get_running_loop()
#         _out = await _loop.run_in_executor(None, partial(optimize_image, _r.content, _fn))
        
#     return f"{_prefix}/images/{_fn}.webp"

async def fetch_img_for_words(words: list[str], __url_prefix=None):
    print("fetching images...")
    _img_link = [search_img(r) for r in words]
    return [(word,img) for (word, img) in zip(words, _img_link)]

async def get_definitions_from_words(words: list[str], summary: str = "", lang: str = lang.VI_VN):
    print("running inferrence")
    _r = requests.post(
        url="https://api.awanllm.com/v1/chat/completions",
        headers={'Content-Type': 'application/json', 'Authorization': f'Bearer {LLM_API_KEY}'},
        data=json.dumps({
            "model": "Meta-Llama-3-8B-Instruct",
            "messages": [
                {"role": "user", "content": f"{summary}. Give concise, easy-to-understand definitions for the following keywords: {'; '.join(words)}. DO NOT include the keywords inside their respective definition. Use {lang}."}
            ],
            "presence_penalty":0.3,
            "temperature":0.55
        })
    )

    # print(_r.json()['choices'][0]['message']['content'].split('\n'))
    print(_r.json()['choices'][0]['message']['content'].split('\n'))
    
    rets = []
    for _x in _r.json()['choices'][0]['message']['content'].split('\n'):
        try:
            k, v = _x.split(':')
            k = k.replace('*','').strip()
            k = remove_numbering(k)
            v = v.strip()
            if (v != '' and not "note: " in k.lower()):
                rets.append((k, v))
        except:
            continue
    return rets[:-1]

def get_imgs_from_words(words: list[str]):
    pass

def classify_words(words: list[str], deep: bool = False):
    pass