LeeRuben commited on
Commit
197a291
Β·
1 Parent(s): e00b8ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -3
app.py CHANGED
@@ -13,6 +13,8 @@ from langchain.llms import HuggingFaceHub, LlamaCpp, CTransformers # For loadin
13
  from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
 
 
16
 
17
 
18
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
@@ -29,13 +31,31 @@ def get_pdf_text(pdf_docs):
29
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
30
 
31
  def get_text_file(docs):
32
- pass
 
 
 
 
33
 
34
  def get_csv_file(docs):
35
- pass
 
 
 
 
 
 
 
36
 
37
  def get_json_file(docs):
38
- pass
 
 
 
 
 
 
 
39
 
40
 
41
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
13
  from langchain.document_loaders import PyPDFLoader, TextLoader, JSONLoader, CSVLoader
14
  import tempfile # μž„μ‹œ νŒŒμΌμ„ μƒμ„±ν•˜κΈ° μœ„ν•œ λΌμ΄λΈŒλŸ¬λ¦¬μž…λ‹ˆλ‹€.
15
  import os
16
+ import pandas as pd
17
+ import json
18
 
19
 
20
  # PDF λ¬Έμ„œλ‘œλΆ€ν„° ν…μŠ€νŠΈλ₯Ό μΆ”μΆœν•˜λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
31
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
32
 
33
  def get_text_file(docs):
34
+ text_list = []
35
+ for doc in docs:
36
+ text = doc.getvalue().decode('utf-8') # UTF-8 ν˜•μ‹μœΌλ‘œ μΈμ½”λ”©λœ ν…μŠ€νŠΈ 읽기
37
+ text_list.append(text)
38
+ return text_list
39
 
40
  def get_csv_file(docs):
41
+ text_list = []
42
+ for doc in docs:
43
+ df = pd.read_csv(doc)
44
+ # CSV 파일의 νŠΉμ • μ—΄μ΄λ‚˜ ν–‰μ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ
45
+ # μ˜ˆμ‹œ: 첫 번째 μ—΄μ˜ 데이터λ₯Ό ν…μŠ€νŠΈλ‘œ λ³€ν™˜
46
+ text = ' '.join(df.iloc[:, 0].astype(str).tolist())
47
+ text_list.append(text)
48
+ return text_list
49
 
50
  def get_json_file(docs):
51
+ text_list = []
52
+ for doc in docs:
53
+ data = json.load(doc)
54
+ # JSON 파일의 ν•„μš”ν•œ ν‚€μ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ
55
+ # μ˜ˆμ‹œ: 'text' ν‚€μ—μ„œ ν…μŠ€νŠΈ μΆ”μΆœ
56
+ text = data.get('text', '') # ν•„μš”ν•œ ν‚€ μž…λ ₯
57
+ text_list.append(text)
58
+ return text_list
59
 
60
 
61
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.