Spaces:
Running
Running
seawolf2357
commited on
Commit
โข
c8cd942
1
Parent(s):
c0f1be6
Update app.py
Browse files
app.py
CHANGED
@@ -2,9 +2,6 @@ import discord
|
|
2 |
import logging
|
3 |
import os
|
4 |
from huggingface_hub import InferenceClient
|
5 |
-
import asyncio
|
6 |
-
import subprocess
|
7 |
-
from datasets import load_dataset
|
8 |
import pandas as pd
|
9 |
from fuzzywuzzy import process
|
10 |
|
@@ -29,15 +26,17 @@ def load_optimized_dataset(data_files):
|
|
29 |
data_frames = [pd.read_csv(file) for file in data_files]
|
30 |
full_data = pd.concat(data_frames, ignore_index=True)
|
31 |
# ์ฌ๊ฑด๋ช
์ ํค๋ก ํ๊ณ ์ฌ๊ฑด๋ฒํธ์ ์ ๋ฌธ์ ์ ์ฅํ๋ ๋์
๋๋ฆฌ ์์ฑ
|
32 |
-
name_to_number = full_data.groupby('
|
|
|
33 |
number_to_fulltext = full_data.set_index('์ฌ๊ฑด๋ฒํธ')['์ ๋ฌธ'].to_dict()
|
34 |
-
return name_to_number, number_to_fulltext
|
35 |
|
36 |
-
name_to_number, number_to_fulltext = load_optimized_dataset(data_files)
|
37 |
print("Dataset loaded successfully.")
|
38 |
|
39 |
-
# ์ฌ๊ฑด๋ช
๋ฆฌ์คํธ ์์ฑ
|
40 |
all_case_names = list(name_to_number.keys())
|
|
|
41 |
|
42 |
# ๋ก๊น
์ค์
|
43 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
|
@@ -79,9 +78,10 @@ class MyClient(discord.Client):
|
|
79 |
|
80 |
self.is_processing = True
|
81 |
try:
|
82 |
-
|
83 |
-
if
|
84 |
-
|
|
|
85 |
else:
|
86 |
await message.channel.send("์ฃ์กํฉ๋๋ค, ์ ๊ณตํ ์ ์๋ ์ ๋ณด๊ฐ ์์ต๋๋ค.")
|
87 |
finally:
|
@@ -98,8 +98,9 @@ async def generate_response(message):
|
|
98 |
user_input = message.content.strip()
|
99 |
user_mention = message.author.mention
|
100 |
|
101 |
-
# ์ ์ฌํ ์ฌ๊ฑด๋ช
์ฐพ๊ธฐ
|
102 |
matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=80)
|
|
|
103 |
|
104 |
if matched_case_names:
|
105 |
case_numbers = []
|
@@ -108,6 +109,13 @@ async def generate_response(message):
|
|
108 |
case_numbers = list(set(case_numbers)) # ์ค๋ณต ์ ๊ฑฐ
|
109 |
case_numbers_str = "\n".join(case_numbers)
|
110 |
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ์ฌ๊ฑด๋ช
์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
elif user_input in number_to_fulltext:
|
112 |
full_text = number_to_fulltext[user_input]
|
113 |
system_message = f"{user_mention}, ์ฌ๊ฑด๋ฒํธ '{user_input}'์ ์ ๋ฌธ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n\n{full_text}"
|
@@ -116,15 +124,13 @@ async def generate_response(message):
|
|
116 |
|
117 |
# ๋ฉ์์ง ๊ธธ์ด ์ ํ ์ฒ๋ฆฌ
|
118 |
max_length = 2000
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
return response_parts[0] if response_parts else "์ฃ์กํฉ๋๋ค, ์ ๊ณตํ ์ ์๋ ์ ๋ณด๊ฐ ์์ต๋๋ค."
|
126 |
-
return system_message
|
127 |
|
128 |
if __name__ == "__main__":
|
129 |
discord_client = MyClient(intents=intents)
|
130 |
-
discord_client.run(os.getenv('DISCORD_TOKEN'))
|
|
|
2 |
import logging
|
3 |
import os
|
4 |
from huggingface_hub import InferenceClient
|
|
|
|
|
|
|
5 |
import pandas as pd
|
6 |
from fuzzywuzzy import process
|
7 |
|
|
|
26 |
data_frames = [pd.read_csv(file) for file in data_files]
|
27 |
full_data = pd.concat(data_frames, ignore_index=True)
|
28 |
# ์ฌ๊ฑด๋ช
์ ํค๋ก ํ๊ณ ์ฌ๊ฑด๋ฒํธ์ ์ ๋ฌธ์ ์ ์ฅํ๋ ๋์
๋๋ฆฌ ์์ฑ
|
29 |
+
name_to_number = full_data.groupby('์ฌ๊ฑด๋ช
')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
30 |
+
summary_to_number = full_data.groupby('ํ์์ฌํญ')['์ฌ๊ฑด๋ฒํธ'].apply(list).to_dict()
|
31 |
number_to_fulltext = full_data.set_index('์ฌ๊ฑด๋ฒํธ')['์ ๋ฌธ'].to_dict()
|
32 |
+
return name_to_number, summary_to_number, number_to_fulltext
|
33 |
|
34 |
+
name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
|
35 |
print("Dataset loaded successfully.")
|
36 |
|
37 |
+
# ์ฌ๊ฑด๋ช
๋ฐ ํ์์ฌํญ ๋ฆฌ์คํธ ์์ฑ
|
38 |
all_case_names = list(name_to_number.keys())
|
39 |
+
all_case_summaries = list(summary_to_number.keys())
|
40 |
|
41 |
# ๋ก๊น
์ค์
|
42 |
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
|
|
|
78 |
|
79 |
self.is_processing = True
|
80 |
try:
|
81 |
+
response_parts = await generate_response(message)
|
82 |
+
if response_parts:
|
83 |
+
for part in response_parts:
|
84 |
+
await message.channel.send(part)
|
85 |
else:
|
86 |
await message.channel.send("์ฃ์กํฉ๋๋ค, ์ ๊ณตํ ์ ์๋ ์ ๋ณด๊ฐ ์์ต๋๋ค.")
|
87 |
finally:
|
|
|
98 |
user_input = message.content.strip()
|
99 |
user_mention = message.author.mention
|
100 |
|
101 |
+
# ์ ์ฌํ ์ฌ๊ฑด๋ช
๋ฐ ํ๊ฒฐ์์ง ์ฐพ๊ธฐ
|
102 |
matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=80)
|
103 |
+
matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=80)
|
104 |
|
105 |
if matched_case_names:
|
106 |
case_numbers = []
|
|
|
109 |
case_numbers = list(set(case_numbers)) # ์ค๋ณต ์ ๊ฑฐ
|
110 |
case_numbers_str = "\n".join(case_numbers)
|
111 |
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ์ฌ๊ฑด๋ช
์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
112 |
+
elif matched_case_summaries:
|
113 |
+
case_numbers = []
|
114 |
+
for case_summary, score in matched_case_summaries:
|
115 |
+
case_numbers.extend(summary_to_number[case_summary])
|
116 |
+
case_numbers = list(set(case_numbers)) # ์ค๋ณต ์ ๊ฑฐ
|
117 |
+
case_numbers_str = "\n".join(case_numbers)
|
118 |
+
system_message = f"{user_mention}, '{user_input}'์ ์ ์ฌํ ํ์์ฌํญ์ ์ฌ๊ฑด๋ฒํธ๋ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n{case_numbers_str}"
|
119 |
elif user_input in number_to_fulltext:
|
120 |
full_text = number_to_fulltext[user_input]
|
121 |
system_message = f"{user_mention}, ์ฌ๊ฑด๋ฒํธ '{user_input}'์ ์ ๋ฌธ์ ๋ค์๊ณผ ๊ฐ์ต๋๋ค:\n\n{full_text}"
|
|
|
124 |
|
125 |
# ๋ฉ์์ง ๊ธธ์ด ์ ํ ์ฒ๋ฆฌ
|
126 |
max_length = 2000
|
127 |
+
response_parts = []
|
128 |
+
for i in range(0, len(system_message), max_length):
|
129 |
+
part_response = system_message[i:i + max_length]
|
130 |
+
response_parts.append(part_response)
|
131 |
+
|
132 |
+
return response_parts
|
|
|
|
|
133 |
|
134 |
if __name__ == "__main__":
|
135 |
discord_client = MyClient(intents=intents)
|
136 |
+
discord_client.run(os.getenv('DISCORD_TOKEN'))
|