hyonee commited on
Commit
62f60fd
โ€ข
1 Parent(s): 0a87e57
Files changed (1) hide show
  1. app.py +37 -9
app.py CHANGED
@@ -30,19 +30,47 @@ def get_pdf_text(pdf_docs):
30
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
31
 
32
  def get_text_file(docs):
33
- text_loader = TextLoader(docs) # TextLoader๋ฅผ ์‚ฌ์šฉํ•ด ํ…์ŠคํŠธ ํŒŒ์ผ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
34
- text_doc = text_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
35
- return text_doc
 
 
 
 
 
 
36
 
37
  def get_csv_file(docs):
38
- csv_loader = CSVLoader(docs) # CSVLoader๋ฅผ ์‚ฌ์šฉํ•ด CSV ํŒŒ์ผ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
39
- csv_doc = csv_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
40
- return csv_doc
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def get_json_file(docs):
43
- json_loader = JSONLoader(docs) # JSONLoader๋ฅผ ์‚ฌ์šฉํ•ด JSON ํŒŒ์ผ์„ ๋กœ๋“œํ•ฉ๋‹ˆ๋‹ค.
44
- json_doc = json_loader.load() # ํ…์ŠคํŠธ๋ฅผ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.
45
- return json_doc
 
 
 
 
 
 
 
 
 
46
 
47
 
48
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.
 
30
  # ์•„๋ž˜ ํ…์ŠคํŠธ ์ถ”์ถœ ํ•จ์ˆ˜๋ฅผ ์ž‘์„ฑ
31
 
32
  def get_text_file(docs):
33
+ def get_text_file(docs):
34
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
35
+ temp_filepath = os.path.join(temp_dir.name, "text_file.txt") # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
36
+ with open(temp_filepath, "w", encoding="utf-8") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ํ…์ŠคํŠธ ์“ฐ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
37
+ f.write(docs) # ํ…์ŠคํŠธ ๋ฌธ์„œ์˜ ๋‚ด์šฉ์„ ์ž„์‹œ ํŒŒ์ผ์— ์”๋‹ˆ๋‹ค.
38
+ with open(temp_filepath, "r", encoding="utf-8") as f: # ์ž„์‹œ ํŒŒ์ผ์„ ํ…์ŠคํŠธ ์ฝ๊ธฐ ๋ชจ๋“œ๋กœ ์—ฝ๋‹ˆ๋‹ค.
39
+ text_content = f.read() # ํ…์ŠคํŠธ ํŒŒ์ผ์˜ ๋‚ด์šฉ์„ ์ฝ์Šต๋‹ˆ๋‹ค.
40
+ return text_content # ์ถ”์ถœํ•œ ํ…์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
41
+
42
 
43
  def get_csv_file(docs):
44
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
45
+ temp_filepath = os.path.join(temp_dir.name, "csv_file.csv") # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
46
+
47
+ # ํ…์ŠคํŠธ ๋ฐ์ดํ„ฐ๋ฅผ CSV ํŒŒ์ผ์— ์“ฐ๊ธฐ
48
+ with open(temp_filepath, "w", newline="", encoding="utf-8") as csv_file:
49
+ csv_writer = csv.writer(csv_file)
50
+ for line in docs:
51
+ # ์—ฌ๊ธฐ์„œ line์€ CSV ํŒŒ์ผ์˜ ํ•œ ํ–‰์„ ๋‚˜ํƒ€๋ƒ…๋‹ˆ๋‹ค.
52
+ csv_writer.writerow(line.split(',')) # ์˜ˆ์‹œ: ์‰ผํ‘œ๋กœ ๊ตฌ๋ถ„๋œ ๋ฐ์ดํ„ฐ๋ฅผ ์‚ฌ์šฉ
53
+
54
+ # CSV ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ ์ฝ๊ธฐ
55
+ with open(temp_filepath, "r", encoding="utf-8") as csv_file:
56
+ csv_reader = csv.reader(csv_file)
57
+ csv_data = [row for row in csv_reader]
58
+
59
+ return csv_data # ์ฝ์–ด์˜จ CSV ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
60
 
61
  def get_json_file(docs):
62
+ temp_dir = tempfile.TemporaryDirectory() # ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
63
+ temp_filepath = os.path.join(temp_dir.name, "json_file.json") # ์ž„์‹œ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ์ƒ์„ฑํ•ฉ๋‹ˆ๋‹ค.
64
+
65
+ # ํ…์ŠคํŠธ ๋ฐ์ดํ„ฐ๋ฅผ JSON ํŒŒ์ผ์— ์“ฐ๊ธฐ
66
+ with open(temp_filepath, "w", encoding="utf-8") as json_file:
67
+ json.dump(docs, json_file, indent=2) # ๋ฐ์ดํ„ฐ๋ฅผ JSON ํŒŒ์ผ์— ์“ฐ๊ธฐ
68
+
69
+ # JSON ํŒŒ์ผ์—์„œ ๋ฐ์ดํ„ฐ ์ฝ๊ธฐ
70
+ with open(temp_filepath, "r", encoding="utf-8") as json_file:
71
+ json_data = json.load(json_file)
72
+
73
+ return json_data # ์ฝ์–ด์˜จ JSON ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
74
 
75
 
76
  # ๋ฌธ์„œ๋“ค์„ ์ฒ˜๋ฆฌํ•˜์—ฌ ํ…์ŠคํŠธ ์ฒญํฌ๋กœ ๋‚˜๋ˆ„๋Š” ํ•จ์ˆ˜์ž…๋‹ˆ๋‹ค.