Spaces:
Running
Running
seawolf2357
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -8,6 +8,8 @@ from datasets import load_dataset
|
|
8 |
import pandas as pd
|
9 |
from fuzzywuzzy import process
|
10 |
|
|
|
|
|
11 |
# ํ์ฌ ์์
๋๋ ํ ๋ฆฌ ์ถ๋ ฅ
|
12 |
print("Current Working Directory:", os.getcwd())
|
13 |
|
@@ -28,16 +30,12 @@ else:
|
|
28 |
def load_optimized_dataset(data_files):
|
29 |
data_frames = [pd.read_csv(file) for file in data_files]
|
30 |
full_data = pd.concat(data_frames, ignore_index=True)
|
|
|
|
|
31 |
# ์ฌ๊ฑด๋ช
์ ํค๋ก ํ๊ณ ์ฌ๊ฑด๋ฒํธ์ ์ ๋ฌธ์ ์ ์ฅํ๋ ๋์
๋๋ฆฌ ์์ฑ
|
32 |
name_to_number = full_data.groupby('์ฌ๊ฑด๋ช
')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
33 |
summary_to_number = full_data.groupby('ํ์์ฌํญ')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
34 |
number_to_fulltext = full_data.set_index('์ฌ๊ฑด๋ฒํธ')['์ ๋ฌธ'].to_dict()
|
35 |
-
|
36 |
-
# ๋๋ฒ๊น
์ฉ ๋ก๊น
|
37 |
-
logging.debug(f"Sample entries in name_to_number: {list(name_to_number.items())[:3]}")
|
38 |
-
logging.debug(f"Sample entries in summary_to_number: {list(summary_to_number.items())[:3]}")
|
39 |
-
logging.debug(f"Sample entries in number_to_fulltext: {list(number_to_fulltext.items())[:3]}")
|
40 |
-
|
41 |
return name_to_number, summary_to_number, number_to_fulltext
|
42 |
|
43 |
name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
|
@@ -112,7 +110,7 @@ async def generate_response(message):
|
|
112 |
user_mention = message.author.mention
|
113 |
|
114 |
# ์ ์ฌํ ์ฌ๊ฑด๋ช
๋ฐ ํ์์ฌํญ ์ฐพ๊ธฐ
|
115 |
-
matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70)
|
116 |
matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
|
117 |
|
118 |
logging.debug(f"Matched case names: {matched_case_names}")
|
@@ -121,15 +119,15 @@ async def generate_response(message):
|
|
121 |
if matched_case_names:
|
122 |
case_numbers = []
|
123 |
for case_name, score in matched_case_names:
|
124 |
-
case_numbers.extend(name_to_number[
|
125 |
-
case_numbers = list(set(case_numbers))
|
126 |
case_numbers_str = "\n".join(case_numbers)
|
127 |
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ์ฌ๊ฑด๋ช
์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
128 |
elif matched_case_summaries:
|
129 |
case_numbers = []
|
130 |
for case_summary, score in matched_case_summaries:
|
131 |
-
case_numbers.extend(summary_to_number[
|
132 |
-
case_numbers = list(set(case_numbers))
|
133 |
case_numbers_str = "\n".join(case_numbers)
|
134 |
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ํ์์ฌํญ์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
135 |
elif user_input in number_to_fulltext:
|
|
|
8 |
import pandas as pd
|
9 |
from fuzzywuzzy import process
|
10 |
|
11 |
+
|
12 |
+
|
13 |
# ํ์ฌ ์์
๋๋ ํ ๋ฆฌ ์ถ๋ ฅ
|
14 |
print("Current Working Directory:", os.getcwd())
|
15 |
|
|
|
30 |
def load_optimized_dataset(data_files):
|
31 |
data_frames = [pd.read_csv(file) for file in data_files]
|
32 |
full_data = pd.concat(data_frames, ignore_index=True)
|
33 |
+
# ํ์ธ์ ์ํ ๋ฐ์ดํฐ ์ํ ์ถ๋ ฅ
|
34 |
+
logging.debug(f"Data sample: {full_data[['์ฌ๊ฑด๋ช
', '์ฌ๊ฑด๋ฒํธ', 'ํ์์ฌํญ']].head()}")
|
35 |
# ์ฌ๊ฑด๋ช
์ ํค๋ก ํ๊ณ ์ฌ๊ฑด๋ฒํธ์ ์ ๋ฌธ์ ์ ์ฅํ๋ ๋์
๋๋ฆฌ ์์ฑ
|
36 |
name_to_number = full_data.groupby('์ฌ๊ฑด๋ช
')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
37 |
summary_to_number = full_data.groupby('ํ์์ฌํญ')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
38 |
number_to_fulltext = full_data.set_index('์ฌ๊ฑด๋ฒํธ')['์ ๋ฌธ'].to_dict()
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
return name_to_number, summary_to_number, number_to_fulltext
|
40 |
|
41 |
name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
|
|
|
110 |
user_mention = message.author.mention
|
111 |
|
112 |
# ์ ์ฌํ ์ฌ๊ฑด๋ช
๋ฐ ํ์์ฌํญ ์ฐพ๊ธฐ
|
113 |
+
matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=70)
|
114 |
matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=70)
|
115 |
|
116 |
logging.debug(f"Matched case names: {matched_case_names}")
|
|
|
119 |
if matched_case_names:
|
120 |
case_numbers = []
|
121 |
for case_name, score in matched_case_names:
|
122 |
+
case_numbers.extend(name_to_number.get(case_name, []))
|
123 |
+
case_numbers = list(set(case_numbers))
|
124 |
case_numbers_str = "\n".join(case_numbers)
|
125 |
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ์ฌ๊ฑด๋ช
์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
126 |
elif matched_case_summaries:
|
127 |
case_numbers = []
|
128 |
for case_summary, score in matched_case_summaries:
|
129 |
+
case_numbers.extend(summary_to_number.get(case_summary, []))
|
130 |
+
case_numbers = list(set(case_numbers))
|
131 |
case_numbers_str = "\n".join(case_numbers)
|
132 |
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ํ์์ฌํญ์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
133 |
elif user_input in number_to_fulltext:
|