Spaces:

nijoow
/

openai_space

Runtime error

App Files Files Community

nijoow commited on Nov 21, 2023

Commit

9b75908

1 Parent(s): 6934747

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -31

app.py CHANGED Viewed

@@ -27,41 +27,50 @@ def get_pdf_text(pdf_docs):
 # 과제
 # 아래 텍스트 추출 함수를 작성
-def get_text_file(docs):
-    text_content = ""
-    # Hugging Face 모델을 로드합니다. (예시로 'bert-base-uncased' 모델을 사용합니다)
-    model_name = 'bert-base-uncased'
-    model = HuggingFaceEmbeddings(model_name)
-    # 텍스트 추출을 위해 문서를 로드합니다.
-    text_loader = TextLoader(docs.getvalue().decode('utf-8'))
-    document = text_loader.load()
 import csv
 import json
-from tempfile import NamedTemporaryFile
-def get_csv_file(docs):
-    with NamedTemporaryFile(delete=False) as temp_file:
-        temp_file.write(docs.getvalue().encode('utf-8'))
-        temp_file.seek(0)
-        csv_data = []
-        csv_reader = csv.reader(temp_file)
-        for row in csv_reader:
-            csv_data.append(row)
-    return csv_data
-def get_json_file(docs):
-    with NamedTemporaryFile(delete=False) as temp_file:
-        temp_file.write(docs.getvalue().encode('utf-8'))
-        temp_file.seek(0)
-        json_data = json.load(temp_file)
-    return json_data

 # 과제
 # 아래 텍스트 추출 함수를 작성
 import csv
 import json
+from PyPDF2 import PdfReader
+def get_text_from_document(doc_path):
+    # 문서의 확장자에 따라 다른 처리를 수행
+    if doc_path.endswith('.txt'):
+        # 텍스트 파일의 경우
+        with open(doc_path, 'r', encoding='utf-8') as file:
+            text = file.read()
+        return text
+    elif doc_path.endswith('.csv'):
+        # CSV 파일의 경우
+        with open(doc_path, 'r', encoding='utf-8') as file:
+            csv_reader = csv.reader(file)
+            text = '\n'.join(','.join(row) for row in csv_reader)
+        return text
+    elif doc_path.endswith('.json'):
+        # JSON 파일의 경우
+        with open(doc_path, 'r', encoding='utf-8') as file:
+            data = json.load(file)
+            text = json.dumps(data, indent=2)
+        return text
+    elif doc_path.endswith('.pdf'):
+        # PDF 파일의 경우
+        with open(doc_path, 'rb') as file:
+            pdf_reader = PdfReader(file)
+            text = ''
+            for page_num in range(len(pdf_reader.pages)):
+                text += pdf_reader.pages[page_num].extract_text()
+        return text
+    else:
+        # 지원하지 않는 확장자인 경우
+        raise ValueError(f"Unsupported file extension: {doc_path}")
+def get_text_file(docs):
+    # 각 문서에 대한 처리를 수행
+    for doc in docs:
+        try:
+            text = get_text_from_document(doc)
+            # 여기에서 text를 사용하여 추가적인 전처리 로직을 수행할 수 있음
+            print(f"Text from {doc}:\n{text}\n")
+        except ValueError as e:
+            print(f"Error processing {doc}: {e}")