File size: 1,184 Bytes
8bbd0a7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
import base64
from langchain.docstore.document import Document
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms.openai import OpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.document_loaders import UnstructuredURLLoader
import nltk
import openai

nltk.download('punkt')
OPENAI_API_KEY = "sk-proj-uCiflA45fuchFdjkbNJ7T3BlbkFJF5WiEf2zHkttr7s9kijX"


def create_brand_html(brand_link):
    urls = [brand_link]
    loader = UnstructuredURLLoader(urls=urls)
    data = loader.load()
    chunk_size = 3000
    chunk_overlap = 200
    text_splitter = CharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )
    texts = text_splitter.split_text(data[0].page_content)
    docs = [Document(page_content=t) for t in texts[:]]
    return docs


def create_langchain_openai_query(docs):
    openai.api_key = OPENAI_API_KEY
    llm = OpenAI(temperature=0, openai_api_key=openai.api_key)
    map_reduce_chain = load_summarize_chain(llm, chain_type="map_reduce")
    output = map_reduce_chain.run(docs)
    return output