seawolf2357 commited on
Commit
6eaab32
ยท
verified ยท
1 Parent(s): f9be081

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -11
app.py CHANGED
@@ -8,6 +8,8 @@ from datasets import load_dataset
8
  import pandas as pd
9
  from fuzzywuzzy import process
10
 
 
 
11
  # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ถœ๋ ฅ
12
  print("Current Working Directory:", os.getcwd())
13
 
@@ -28,16 +30,12 @@ else:
28
  def load_optimized_dataset(data_files):
29
  data_frames = [pd.read_csv(file) for file in data_files]
30
  full_data = pd.concat(data_frames, ignore_index=True)
 
 
31
  # ์‚ฌ๊ฑด๋ช…์„ ํ‚ค๋กœ ํ•˜๊ณ  ์‚ฌ๊ฑด๋ฒˆํ˜ธ์™€ ์ „๋ฌธ์„ ์ €์žฅํ•˜๋Š” ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ
32
  name_to_number = full_data.groupby('์‚ฌ๊ฑด๋ช…')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
33
  summary_to_number = full_data.groupby('ํŒ์‹œ์‚ฌํ•ญ')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
34
  number_to_fulltext = full_data.set_index('์‚ฌ๊ฑด๋ฒˆํ˜ธ')['์ „๋ฌธ'].to_dict()
35
-
36
- # ๋””๋ฒ„๊น…์šฉ ๋กœ๊น…
37
- logging.debug(f"Sample entries in name_to_number: {list(name_to_number.items())[:3]}")
38
- logging.debug(f"Sample entries in summary_to_number: {list(summary_to_number.items())[:3]}")
39
- logging.debug(f"Sample entries in number_to_fulltext: {list(number_to_fulltext.items())[:3]}")
40
-
41
  return name_to_number, summary_to_number, number_to_fulltext
42
 
43
  name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
@@ -112,7 +110,7 @@ async def generate_response(message):
112
  user_mention = message.author.mention
113
 
114
  # ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช… ๋ฐ ํŒ์‹œ์‚ฌํ•ญ ์ฐพ๊ธฐ
115
- matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70) # score_cutoff ๊ฐ’ ์กฐ์ •
116
  matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
117
 
118
  logging.debug(f"Matched case names: {matched_case_names}")
@@ -121,15 +119,15 @@ async def generate_response(message):
121
  if matched_case_names:
122
  case_numbers = []
123
  for case_name, score in matched_case_names:
124
- case_numbers.extend(name_to_number[case_name])
125
- case_numbers = list(set(case_numbers)) # ์ค‘๋ณต ์ œ๊ฑฐ
126
  case_numbers_str = "\n".join(case_numbers)
127
  system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช…์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
128
  elif matched_case_summaries:
129
  case_numbers = []
130
  for case_summary, score in matched_case_summaries:
131
- case_numbers.extend(summary_to_number[case_summary])
132
- case_numbers = list(set(case_numbers)) # ์ค‘๋ณต ์ œ๊ฑฐ
133
  case_numbers_str = "\n".join(case_numbers)
134
  system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ํŒ์‹œ์‚ฌํ•ญ์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
135
  elif user_input in number_to_fulltext:
 
8
  import pandas as pd
9
  from fuzzywuzzy import process
10
 
11
+
12
+
13
  # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ถœ๋ ฅ
14
  print("Current Working Directory:", os.getcwd())
15
 
 
30
  def load_optimized_dataset(data_files):
31
  data_frames = [pd.read_csv(file) for file in data_files]
32
  full_data = pd.concat(data_frames, ignore_index=True)
33
+ # ํ™•์ธ์„ ์œ„ํ•œ ๋ฐ์ดํ„ฐ ์ƒ˜ํ”Œ ์ถœ๋ ฅ
34
+ logging.debug(f"Data sample: {full_data[['์‚ฌ๊ฑด๋ช…', '์‚ฌ๊ฑด๋ฒˆํ˜ธ', 'ํŒ์‹œ์‚ฌํ•ญ']].head()}")
35
  # ์‚ฌ๊ฑด๋ช…์„ ํ‚ค๋กœ ํ•˜๊ณ  ์‚ฌ๊ฑด๋ฒˆํ˜ธ์™€ ์ „๋ฌธ์„ ์ €์žฅํ•˜๋Š” ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ
36
  name_to_number = full_data.groupby('์‚ฌ๊ฑด๋ช…')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
37
  summary_to_number = full_data.groupby('ํŒ์‹œ์‚ฌํ•ญ')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
38
  number_to_fulltext = full_data.set_index('์‚ฌ๊ฑด๋ฒˆํ˜ธ')['์ „๋ฌธ'].to_dict()
 
 
 
 
 
 
39
  return name_to_number, summary_to_number, number_to_fulltext
40
 
41
  name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
 
110
  user_mention = message.author.mention
111
 
112
  # ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช… ๋ฐ ํŒ์‹œ์‚ฌํ•ญ ์ฐพ๊ธฐ
113
+ matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70)
114
  matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
115
 
116
  logging.debug(f"Matched case names: {matched_case_names}")
 
119
  if matched_case_names:
120
  case_numbers = []
121
  for case_name, score in matched_case_names:
122
+ case_numbers.extend(name_to_number.get(case_name, []))
123
+ case_numbers = list(set(case_numbers))
124
  case_numbers_str = "\n".join(case_numbers)
125
  system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช…์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
126
  elif matched_case_summaries:
127
  case_numbers = []
128
  for case_summary, score in matched_case_summaries:
129
+ case_numbers.extend(summary_to_number.get(case_summary, []))
130
+ case_numbers = list(set(case_numbers))
131
  case_numbers_str = "\n".join(case_numbers)
132
  system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ํŒ์‹œ์‚ฌํ•ญ์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
133
  elif user_input in number_to_fulltext: