import os import base64 from langchain.docstore.document import Document from langchain.text_splitter import CharacterTextSplitter from langchain.llms.openai import OpenAI from langchain.chains.summarize import load_summarize_chain from langchain.document_loaders import UnstructuredURLLoader import nltk import openai nltk.download('punkt') OPENAI_API_KEY = "sk-proj-uCiflA45fuchFdjkbNJ7T3BlbkFJF5WiEf2zHkttr7s9kijX" def create_brand_html(brand_link): urls = [brand_link] loader = UnstructuredURLLoader(urls=urls) data = loader.load() chunk_size = 3000 chunk_overlap = 200 text_splitter = CharacterTextSplitter( chunk_size=chunk_size, chunk_overlap=chunk_overlap, length_function=len, ) texts = text_splitter.split_text(data[0].page_content) docs = [Document(page_content=t) for t in texts[:]] return docs def create_langchain_openai_query(docs): openai.api_key = OPENAI_API_KEY llm = OpenAI(temperature=0, openai_api_key=openai.api_key) map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce") output = map_reduce_chain.run(docs) return output