seawolf2357 commited on
Commit
c8cd942
โ€ข
1 Parent(s): c0f1be6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -20
app.py CHANGED
@@ -2,9 +2,6 @@ import discord
2
  import logging
3
  import os
4
  from huggingface_hub import InferenceClient
5
- import asyncio
6
- import subprocess
7
- from datasets import load_dataset
8
  import pandas as pd
9
  from fuzzywuzzy import process
10
 
@@ -29,15 +26,17 @@ def load_optimized_dataset(data_files):
29
  data_frames = [pd.read_csv(file) for file in data_files]
30
  full_data = pd.concat(data_frames, ignore_index=True)
31
  # ์‚ฌ๊ฑด๋ช…์„ ํ‚ค๋กœ ํ•˜๊ณ  ์‚ฌ๊ฑด๋ฒˆํ˜ธ์™€ ์ „๋ฌธ์„ ์ €์žฅํ•˜๋Š” ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ
32
- name_to_number = full_data.groupby('ํŒ์‹œ์‚ฌํ•ญ')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
 
33
  number_to_fulltext = full_data.set_index('์‚ฌ๊ฑด๋ฒˆํ˜ธ')['์ „๋ฌธ'].to_dict()
34
- return name_to_number, number_to_fulltext
35
 
36
- name_to_number, number_to_fulltext = load_optimized_dataset(data_files)
37
  print("Dataset loaded successfully.")
38
 
39
- # ์‚ฌ๊ฑด๋ช… ๋ฆฌ์ŠคํŠธ ์ƒ์„ฑ
40
  all_case_names = list(name_to_number.keys())
 
41
 
42
  # ๋กœ๊น… ์„ค์ •
43
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
@@ -79,9 +78,10 @@ class MyClient(discord.Client):
79
 
80
  self.is_processing = True
81
  try:
82
- response = await generate_response(message)
83
- if response and response.strip():
84
- await message.channel.send(response)
 
85
  else:
86
  await message.channel.send("์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
87
  finally:
@@ -98,8 +98,9 @@ async def generate_response(message):
98
  user_input = message.content.strip()
99
  user_mention = message.author.mention
100
 
101
- # ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช… ์ฐพ๊ธฐ
102
  matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=80)
 
103
 
104
  if matched_case_names:
105
  case_numbers = []
@@ -108,6 +109,13 @@ async def generate_response(message):
108
  case_numbers = list(set(case_numbers)) # ์ค‘๋ณต ์ œ๊ฑฐ
109
  case_numbers_str = "\n".join(case_numbers)
110
  system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช…์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
 
 
 
 
 
 
 
111
  elif user_input in number_to_fulltext:
112
  full_text = number_to_fulltext[user_input]
113
  system_message = f"{user_mention}, ์‚ฌ๊ฑด๋ฒˆํ˜ธ '{user_input}'์˜ ์ „๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n\n{full_text}"
@@ -116,15 +124,13 @@ async def generate_response(message):
116
 
117
  # ๋ฉ”์‹œ์ง€ ๊ธธ์ด ์ œํ•œ ์ฒ˜๋ฆฌ
118
  max_length = 2000
119
- if len(system_message) > max_length:
120
- response_parts = []
121
- for i in range(0, len(system_message), max_length):
122
- part_response = system_message[i:i + max_length]
123
- await message.channel.send(part_response)
124
- response_parts.append(part_response)
125
- return response_parts[0] if response_parts else "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค."
126
- return system_message
127
 
128
  if __name__ == "__main__":
129
  discord_client = MyClient(intents=intents)
130
- discord_client.run(os.getenv('DISCORD_TOKEN'))
 
2
  import logging
3
  import os
4
  from huggingface_hub import InferenceClient
 
 
 
5
  import pandas as pd
6
  from fuzzywuzzy import process
7
 
 
26
  data_frames = [pd.read_csv(file) for file in data_files]
27
  full_data = pd.concat(data_frames, ignore_index=True)
28
  # ์‚ฌ๊ฑด๋ช…์„ ํ‚ค๋กœ ํ•˜๊ณ  ์‚ฌ๊ฑด๋ฒˆํ˜ธ์™€ ์ „๋ฌธ์„ ์ €์žฅํ•˜๋Š” ๋”•์…”๋„ˆ๋ฆฌ ์ƒ์„ฑ
29
+ name_to_number = full_data.groupby('์‚ฌ๊ฑด๋ช…')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
30
+ summary_to_number = full_data.groupby('ํŒ์‹œ์‚ฌํ•ญ')['์‚ฌ๊ฑด๋ฒˆํ˜ธ'].apply(list).to_dict()
31
  number_to_fulltext = full_data.set_index('์‚ฌ๊ฑด๋ฒˆํ˜ธ')['์ „๋ฌธ'].to_dict()
32
+ return name_to_number, summary_to_number, number_to_fulltext
33
 
34
+ name_to_number, summary_to_number, number_to_fulltext = load_optimized_dataset(data_files)
35
  print("Dataset loaded successfully.")
36
 
37
+ # ์‚ฌ๊ฑด๋ช… ๋ฐ ํŒ์‹œ์‚ฌํ•ญ ๋ฆฌ์ŠคํŠธ ์ƒ์„ฑ
38
  all_case_names = list(name_to_number.keys())
39
+ all_case_summaries = list(summary_to_number.keys())
40
 
41
  # ๋กœ๊น… ์„ค์ •
42
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
 
78
 
79
  self.is_processing = True
80
  try:
81
+ response_parts = await generate_response(message)
82
+ if response_parts:
83
+ for part in response_parts:
84
+ await message.channel.send(part)
85
  else:
86
  await message.channel.send("์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์ œ๊ณตํ•  ์ˆ˜ ์žˆ๋Š” ์ •๋ณด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.")
87
  finally:
 
98
  user_input = message.content.strip()
99
  user_mention = message.author.mention
100
 
101
+ # ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช… ๋ฐ ํŒ๊ฒฐ์š”์ง€ ์ฐพ๊ธฐ
102
  matched_case_names = process.extractBests(user_input, all_case_names, limit=3, score_cutoff=80)
103
+ matched_case_summaries = process.extractBests(user_input, all_case_summaries, limit=3, score_cutoff=80)
104
 
105
  if matched_case_names:
106
  case_numbers = []
 
109
  case_numbers = list(set(case_numbers)) # ์ค‘๋ณต ์ œ๊ฑฐ
110
  case_numbers_str = "\n".join(case_numbers)
111
  system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ์‚ฌ๊ฑด๋ช…์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
112
+ elif matched_case_summaries:
113
+ case_numbers = []
114
+ for case_summary, score in matched_case_summaries:
115
+ case_numbers.extend(summary_to_number[case_summary])
116
+ case_numbers = list(set(case_numbers)) # ์ค‘๋ณต ์ œ๊ฑฐ
117
+ case_numbers_str = "\n".join(case_numbers)
118
+ system_message = f"{user_mention}, '{user_input}'์™€ ์œ ์‚ฌํ•œ ํŒ์‹œ์‚ฌํ•ญ์˜ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋Š” ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n{case_numbers_str}"
119
  elif user_input in number_to_fulltext:
120
  full_text = number_to_fulltext[user_input]
121
  system_message = f"{user_mention}, ์‚ฌ๊ฑด๋ฒˆํ˜ธ '{user_input}'์˜ ์ „๋ฌธ์€ ๋‹ค์Œ๊ณผ ๊ฐ™์Šต๋‹ˆ๋‹ค:\n\n{full_text}"
 
124
 
125
  # ๋ฉ”์‹œ์ง€ ๊ธธ์ด ์ œํ•œ ์ฒ˜๋ฆฌ
126
  max_length = 2000
127
+ response_parts = []
128
+ for i in range(0, len(system_message), max_length):
129
+ part_response = system_message[i:i + max_length]
130
+ response_parts.append(part_response)
131
+
132
+ return response_parts
 
 
133
 
134
  if __name__ == "__main__":
135
  discord_client = MyClient(intents=intents)
136
+ discord_client.run(os.getenv('DISCORD_TOKEN'))