Paul-Joshi commited on
Commit
d133f54
·
verified ·
1 Parent(s): 7791973

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -10
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
- from langchain.text_splitter import CharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.chat_models import ChatOpenAI
@@ -22,15 +22,8 @@ def method_get_pdf_text(pdf_docs):
22
 
23
 
24
  def method_get_text_chunks(text):
25
- text_splitter = CharacterTextSplitter()
26
- # (
27
- # separator="\n\n",
28
- # chunk_size=1000,
29
- # chunk_overlap=200,
30
- # length_function=len,
31
- # is_separator_regex=False,
32
- # )
33
- chunks = text_splitter.split_text(text)
34
  return chunks
35
 
36
 
 
1
  import streamlit as st
2
  from dotenv import load_dotenv
3
  from PyPDF2 import PdfReader
4
+ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
5
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
6
  from langchain.vectorstores import FAISS
7
  from langchain.chat_models import ChatOpenAI
 
22
 
23
 
24
  def method_get_text_chunks(text):
25
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=7500, chunk_overlap=100)
26
+ doc_splits = text_splitter.split_documents(text)
 
 
 
 
 
 
 
27
  return chunks
28
 
29