hyonee commited on
Commit
48d4ded
β€’
1 Parent(s): 62f60fd
Files changed (1) hide show
  1. app.py +25 -32
app.py CHANGED
@@ -30,47 +30,40 @@ def get_pdf_text(pdf_docs):
30
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
31
 
32
  def get_text_file(docs):
33
- def get_text_file(docs):
34
- temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
35
- temp_filepath = os.path.join(temp_dir.name, "text_file.txt") # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
36
- with open(temp_filepath, "w", encoding="utf-8") as f: # μž„μ‹œ νŒŒμΌμ„ ν…μŠ€νŠΈ μ“°κΈ° λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
37
- f.write(docs) # ν…μŠ€νŠΈ λ¬Έμ„œμ˜ λ‚΄μš©μ„ μž„μ‹œ νŒŒμΌμ— μ”λ‹ˆλ‹€.
38
- with open(temp_filepath, "r", encoding="utf-8") as f: # μž„μ‹œ νŒŒμΌμ„ ν…μŠ€νŠΈ 읽기 λͺ¨λ“œλ‘œ μ—½λ‹ˆλ‹€.
39
- text_content = f.read() # ν…μŠ€νŠΈ 파일의 λ‚΄μš©μ„ μ½μŠ΅λ‹ˆλ‹€.
40
- return text_content # μΆ”μΆœν•œ ν…μŠ€νŠΈλ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
41
-
42
 
43
  def get_csv_file(docs):
44
- temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
45
- temp_filepath = os.path.join(temp_dir.name, "csv_file.csv") # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
46
-
47
- # ν…μŠ€νŠΈ 데이터λ₯Ό CSV νŒŒμΌμ— μ“°κΈ°
48
- with open(temp_filepath, "w", newline="", encoding="utf-8") as csv_file:
49
- csv_writer = csv.writer(csv_file)
50
- for line in docs:
51
- # μ—¬κΈ°μ„œ line은 CSV 파일의 ν•œ 행을 λ‚˜νƒ€λƒ…λ‹ˆλ‹€.
52
- csv_writer.writerow(line.split(',')) # μ˜ˆμ‹œ: μ‰Όν‘œλ‘œ κ΅¬λΆ„λœ 데이터λ₯Ό μ‚¬μš©
53
 
54
- # CSV νŒŒμΌμ—μ„œ 데이터 읽기
55
  with open(temp_filepath, "r", encoding="utf-8") as csv_file:
56
  csv_reader = csv.reader(csv_file)
57
  csv_data = [row for row in csv_reader]
58
 
59
- return csv_data # μ½μ–΄μ˜¨ CSV 데이터λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
60
-
61
- def get_json_file(docs):
62
- temp_dir = tempfile.TemporaryDirectory() # μž„μ‹œ 디렉토리λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
63
- temp_filepath = os.path.join(temp_dir.name, "json_file.json") # μž„μ‹œ 파일 경둜λ₯Ό μƒμ„±ν•©λ‹ˆλ‹€.
64
-
65
- # ν…μŠ€νŠΈ 데이터λ₯Ό JSON νŒŒμΌμ— μ“°κΈ°
66
- with open(temp_filepath, "w", encoding="utf-8") as json_file:
67
- json.dump(docs, json_file, indent=2) # 데이터λ₯Ό JSON νŒŒμΌμ— μ“°κΈ°
68
 
69
- # JSON νŒŒμΌμ—μ„œ 데이터 읽기
70
- with open(temp_filepath, "r", encoding="utf-8") as json_file:
71
- json_data = json.load(json_file)
 
 
 
 
72
 
73
- return json_data # μ½μ–΄μ˜¨ JSON 데이터λ₯Ό λ°˜ν™˜ν•©λ‹ˆλ‹€.
74
 
75
 
76
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.
 
30
  # μ•„λž˜ ν…μŠ€νŠΈ μΆ”μΆœ ν•¨μˆ˜λ₯Ό μž‘μ„±
31
 
32
  def get_text_file(docs):
33
+ temp_dir = tempfile.TemporaryDirectory()
34
+ temp_filepath = os.path.join(temp_dir.name, text_docs.name)
35
+ with open(temp_filepath, "wb") as f:
36
+ f.write(text_docs.getvalue())
37
+ text_loader = TextLoader(temp_filepath)
38
+ text_content = text_loader.load()
39
+ temp_dir.cleanup()
40
+ return text_content
 
41
 
42
  def get_csv_file(docs):
43
+ temp_dir = tempfile.TemporaryDirectory()
44
+ temp_filepath = os.path.join(temp_dir.name, csv_docs.name)
45
+ with open(temp_filepath, "wb") as f:
46
+ f.write(csv_docs.getvalue())
47
+ csv_loader = CSVLoader(temp_filepath)
48
+ csv_content = csv_loader.load()
49
+ temp_dir.cleanup()
50
+ return csv_content
 
51
 
 
52
  with open(temp_filepath, "r", encoding="utf-8") as csv_file:
53
  csv_reader = csv.reader(csv_file)
54
  csv_data = [row for row in csv_reader]
55
 
56
+ return csv_data
 
 
 
 
 
 
 
 
57
 
58
+ def get_json_file(json_docs):
59
+ with tempfile.TemporaryDirectory() as temp_dir:
60
+ temp_filepath = os.path.join(temp_dir, "temp_file.json")
61
+ with open(temp_filepath, "wb") as f:
62
+ f.write(json_docs.getvalue())
63
+ json_loader = JSONLoader(temp_filepath)
64
+ json_content = json_loader.load()
65
 
66
+ return json_content
67
 
68
 
69
  # λ¬Έμ„œλ“€μ„ μ²˜λ¦¬ν•˜μ—¬ ν…μŠ€νŠΈ 청크둜 λ‚˜λˆ„λŠ” ν•¨μˆ˜μž…λ‹ˆλ‹€.