Spaces:
Sleeping
Sleeping
import os | |
import base64 | |
from langchain.docstore.document import Document | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.llms.openai import OpenAI | |
from langchain.chains.summarize import load_summarize_chain | |
from langchain.document_loaders import UnstructuredURLLoader | |
import nltk | |
import openai | |
nltk.download('punkt') | |
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") | |
SCRAP_API_KEY = os.getenv("SCRAP_API_KEY") | |
def create_brand_html(brand_link): | |
urls = [brand_link] | |
loader = UnstructuredURLLoader(urls=urls) | |
data = loader.load() | |
chunk_size = 3000 | |
chunk_overlap = 200 | |
text_splitter = CharacterTextSplitter( | |
chunk_size=chunk_size, | |
chunk_overlap=chunk_overlap, | |
length_function=len, | |
) | |
texts = text_splitter.split_text(data[0].page_content) | |
docs = [Document(page_content=t) for t in texts[:]] | |
return docs | |
def create_langchain_openai_query(docs): | |
openai.api_key = OPENAI_API_KEY | |
llm = OpenAI(temperature=0, openai_api_key=openai.api_key) | |
map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce") | |
output = map_reduce_chain.run(docs) | |
return output | |
def create_screenshot_from_scrap_fly(link_to_fetch): | |
import requests | |
import random | |
try: | |
params = { | |
'key': SCRAP_API_KEY, | |
'url': link_to_fetch, | |
'auto_scroll': True, | |
'capture': 'fullpage', | |
'options': 'block_banners' | |
} | |
response = requests.get('https://api.scrapfly.io/screenshot', params=params) | |
location = f"brand_ss_{random.randint(1, 100000000)}.png" | |
with open('screenshot.jpg', 'wb') as file: | |
file.write(response.content) | |
return {"location": location, "success": True} | |
except Exception as e: | |
return {"success": False, "error": e} | |