Update app.py
Browse files
app.py
CHANGED
@@ -13,6 +13,8 @@ from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers # For loadin
|
|
13 |
from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
|
14 |
import tempfile # μμ νμΌμ μμ±νκΈ° μν λΌμ΄λΈλ¬λ¦¬μ
λλ€.
|
15 |
import os
|
|
|
|
|
16 |
|
17 |
|
18 |
# PDF λ¬Έμλ‘λΆν° ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μμ
λλ€.
|
@@ -29,13 +31,31 @@ def get_pdf_text(pdf_docs):
|
|
29 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
30 |
|
31 |
def get_text_file(docs):
|
32 |
-
|
|
|
|
|
|
|
|
|
33 |
|
34 |
def get_csv_file(docs):
|
35 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
|
37 |
def get_json_file(docs):
|
38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
|
40 |
|
41 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|
|
|
13 |
from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
|
14 |
import tempfile # μμ νμΌμ μμ±νκΈ° μν λΌμ΄λΈλ¬λ¦¬μ
λλ€.
|
15 |
import os
|
16 |
+
import pandas as pd
|
17 |
+
import json
|
18 |
|
19 |
|
20 |
# PDF λ¬Έμλ‘λΆν° ν
μ€νΈλ₯Ό μΆμΆνλ ν¨μμ
λλ€.
|
|
|
31 |
# μλ ν
μ€νΈ μΆμΆ ν¨μλ₯Ό μμ±
|
32 |
|
33 |
def get_text_file(docs):
|
34 |
+
text_list = []
|
35 |
+
for doc in docs:
|
36 |
+
text = doc.getvalue().decode('utf-8') # UTF-8 νμμΌλ‘ μΈμ½λ©λ ν
μ€νΈ μ½κΈ°
|
37 |
+
text_list.append(text)
|
38 |
+
return text_list
|
39 |
|
40 |
def get_csv_file(docs):
|
41 |
+
text_list = []
|
42 |
+
for doc in docs:
|
43 |
+
df = pd.read_csv(doc)
|
44 |
+
# CSV νμΌμ νΉμ μ΄μ΄λ νμμ ν
μ€νΈ μΆμΆ
|
45 |
+
# μμ: 첫 λ²μ§Έ μ΄μ λ°μ΄ν°λ₯Ό ν
μ€νΈλ‘ λ³ν
|
46 |
+
text = ' '.join(df.iloc[:, 0].astype(str).tolist())
|
47 |
+
text_list.append(text)
|
48 |
+
return text_list
|
49 |
|
50 |
def get_json_file(docs):
|
51 |
+
text_list = []
|
52 |
+
for doc in docs:
|
53 |
+
data = json.load(doc)
|
54 |
+
# JSON νμΌμ νμν ν€μμ ν
μ€νΈ μΆμΆ
|
55 |
+
# μμ: 'text' ν€μμ ν
μ€νΈ μΆμΆ
|
56 |
+
text = data.get('text', '') # νμν ν€ μ
λ ₯
|
57 |
+
text_list.append(text)
|
58 |
+
return text_list
|
59 |
|
60 |
|
61 |
# λ¬Έμλ€μ μ²λ¦¬νμ¬ ν
μ€νΈ μ²ν¬λ‘ λλλ ν¨μμ
λλ€.
|