seawolf2357 commited on
Commit
9c071a8
โ€ข
1 Parent(s): 2656039

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -38
app.py CHANGED
@@ -6,6 +6,25 @@ import asyncio
6
  import subprocess
7
  from datasets import load_dataset
8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  # ๋กœ๊น… ์„ค์ •
10
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
11
 
@@ -16,10 +35,6 @@ intents.messages = True
16
  intents.guilds = True
17
  intents.guild_messages = True
18
 
19
- # ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ
20
- data_files = ['train_0.csv', 'train_1.csv', 'train_2.csv', 'train_3.csv', 'train_4.csv', 'train_5.csv']
21
- law_dataset = load_dataset('csv', data_files=data_files)
22
-
23
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
24
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus", token=os.getenv("HF_TOKEN"))
25
 
@@ -29,6 +44,9 @@ SPECIFIC_CHANNEL_ID = int(os.getenv("DISCORD_CHANNEL_ID"))
29
  # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ์ €์žฅํ•  ์ „์—ญ ๋ณ€์ˆ˜
30
  conversation_history = []
31
 
 
 
 
32
  class MyClient(discord.Client):
33
  def __init__(self, *args, **kwargs):
34
  super().__init__(*args, **kwargs)
@@ -48,7 +66,7 @@ class MyClient(discord.Client):
48
  return
49
  self.is_processing = True
50
  try:
51
- response = await self.generate_response(message)
52
  await message.channel.send(response)
53
  finally:
54
  self.is_processing = False
@@ -58,39 +76,38 @@ class MyClient(discord.Client):
58
  isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
59
  )
60
 
61
- async def generate_response(self, message):
62
- global conversation_history
63
- user_input = message.content
64
- user_mention = message.author.mention
65
- system_message = f"{user_mention}, DISCORD์—์„œ ์‚ฌ์šฉ์ž๋“ค์˜ ์งˆ๋ฌธ์— ๋‹ตํ•˜๋Š” ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค."
66
- answer = self.search_in_dataset(user_input, law_dataset)
67
- full_response_text = system_message + "\n\n" + answer
68
-
69
- if not full_response_text.strip():
70
- full_response_text = "์ฃ„์†กํ•ฉ๋‹ˆ๋‹ค, ์ •๋ณด๋ฅผ ์ œ๊ณตํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
71
-
72
- max_length = 2000
73
- if len(full_response_text) > max_length:
74
- for i in range(0, len(full_response_text), max_length):
75
- part_response = full_response_text[i:i+max_length]
76
- await message.channel.send(part_response)
77
- else:
78
- await message.channel.send(full_response_text)
79
-
80
- logging.debug(f'Full model response sent: {full_response_text}')
81
- conversation_history.append({"role": "assistant", "content": full_response_text})
82
-
83
- def search_in_dataset(self, query, dataset):
84
- # ์‚ฌ์šฉ์ž์˜ ์ฟผ๋ฆฌ์™€ ๊ด€๋ จ๋œ ์‚ฌ๊ฑด๋ช…์„ ์ฐพ์•„ ์‚ฌ๊ฑด๋ฒˆํ˜ธ๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
85
- response = []
86
- for record in dataset['train']:
87
- # ์‚ฌ๊ฑด๋ช… ํ•„๋“œ๊ฐ€ None์ด ์•„๋‹ ๋•Œ๋งŒ ๊ฒ€์‚ฌ๋ฅผ ์ˆ˜ํ–‰
88
- if record['์‚ฌ๊ฑด๋ช…'] and query in record['์‚ฌ๊ฑด๋ช…']:
89
- detail = f"์‚ฌ๊ฑด๋ฒˆํ˜ธ: {record['์‚ฌ๊ฑด๋ฒˆํ˜ธ']}"
90
- response.append(detail)
91
-
92
- return "\n".join(response) if response else "๊ด€๋ จ ๋ฒ•๋ฅ  ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
93
-
94
 
95
  if __name__ == "__main__":
96
  discord_client = MyClient(intents=intents)
 
6
  import subprocess
7
  from datasets import load_dataset
8
 
9
+ # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ์ถœ๋ ฅ
10
+ print("Current Working Directory:", os.getcwd())
11
+
12
+ # ๋ฐ์ดํ„ฐ์…‹ ํŒŒ์ผ ์ด๋ฆ„
13
+ data_file = 'train_0.csv'
14
+
15
+ # ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ์— ํŒŒ์ผ์ด ์žˆ๋Š”์ง€ ํ™•์ธ
16
+ if os.path.exists(data_file):
17
+ print(f"File {data_file} exists in the current directory.")
18
+ else:
19
+ print(f"File {data_file} does not exist in the current directory.")
20
+ # ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ ๋ณ€๊ฒฝ (ํ•„์š”ํ•œ ๊ฒฝ์šฐ)
21
+ os.chdir('/home/user/app')
22
+ print("Changed directory to:", os.getcwd())
23
+
24
+ # ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ
25
+ law_dataset = load_dataset('csv', data_files=data_file)
26
+ print("Dataset loaded successfully.")
27
+
28
  # ๋กœ๊น… ์„ค์ •
29
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s:%(levelname)s:%(name)s: %(message)s', handlers=[logging.StreamHandler()])
30
 
 
35
  intents.guilds = True
36
  intents.guild_messages = True
37
 
 
 
 
 
38
  # ์ถ”๋ก  API ํด๋ผ์ด์–ธํŠธ ์„ค์ •
39
  hf_client = InferenceClient("CohereForAI/c4ai-command-r-plus", token=os.getenv("HF_TOKEN"))
40
 
 
44
  # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ์ €์žฅํ•  ์ „์—ญ ๋ณ€์ˆ˜
45
  conversation_history = []
46
 
47
+ # ๋ฒ•๋ฅ  ๋ฐ์ดํ„ฐ์…‹ ๋กœ๋“œ
48
+ law_dataset = load_dataset('csv', data_files='train_0.csv')
49
+
50
  class MyClient(discord.Client):
51
  def __init__(self, *args, **kwargs):
52
  super().__init__(*args, **kwargs)
 
66
  return
67
  self.is_processing = True
68
  try:
69
+ response = await generate_response(message)
70
  await message.channel.send(response)
71
  finally:
72
  self.is_processing = False
 
76
  isinstance(message.channel, discord.Thread) and message.channel.parent_id == SPECIFIC_CHANNEL_ID
77
  )
78
 
79
+ async def generate_response(message):
80
+ global conversation_history
81
+ user_input = message.content
82
+ user_mention = message.author.mention
83
+ system_message = f"{user_mention}, DISCORD์—์„œ ์‚ฌ์šฉ์ž๋“ค์˜ ์งˆ๋ฌธ์— ๋‹ตํ•˜๋Š” ์–ด์‹œ์Šคํ„ดํŠธ์ž…๋‹ˆ๋‹ค."
84
+
85
+ # ๋ฐ์ดํ„ฐ ๊ฒ€์ƒ‰ ๋ฐ ์‘๋‹ต ์ค€๋น„
86
+ answer = search_in_dataset(user_input, law_dataset)
87
+ full_response_text = system_message + "\n\n" + answer
88
+
89
+ # ์‘๋‹ต ๋ถ„ํ•  ์ „์†ก
90
+ max_length = 2000
91
+ if len(full_response_text) > max_length:
92
+ # ๋„ˆ๋ฌด ๊ธด ๋ฉ”์‹œ์ง€๋ฅผ ์—ฌ๋Ÿฌ ๋ถ€๋ถ„์œผ๋กœ ๋‚˜๋ˆ„์–ด ๋ณด๋ƒ…๋‹ˆ๋‹ค.
93
+ for i in range(0, len(full_response_text), max_length):
94
+ part_response = full_response_text[i:i+max_length]
95
+ await message.channel.send(part_response)
96
+ else:
97
+ # ๋ฉ”์‹œ์ง€ ๊ธธ์ด๊ฐ€ ์ ์ ˆํ•˜๋ฉด ํ•œ ๋ฒˆ์— ์ „์†ก
98
+ await message.channel.send(full_response_text)
99
+
100
+ logging.debug(f'Full model response sent: {full_response_text}')
101
+ conversation_history.append({"role": "assistant", "content": full_response_text})
102
+
103
+
104
+ def search_in_dataset(query, dataset):
105
+ # ๊ฐ„๋‹จํ•œ ๊ฒ€์ƒ‰ ๋กœ์ง์„ ๊ตฌํ˜„ํ•ฉ๋‹ˆ๋‹ค.
106
+ # ์—ฌ๊ธฐ์—์„œ๋Š” ์˜ˆ์ œ๋กœ ๋‹จ์ˆœํ™”ํ•˜๊ธฐ ์œ„ํ•ด ์ฒซ ๋ฒˆ์งธ ํ•ญ๋ชฉ์„ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
107
+ for record in dataset['train']:
108
+ if query in record['์‚ฌ๊ฑด๋ช…']:
109
+ return record['์‚ฌ๊ฑด๋ฒˆํ˜ธ']
110
+ return "๊ด€๋ จ ๋ฒ•๋ฅ  ์ •๋ณด๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
 
111
 
112
  if __name__ == "__main__":
113
  discord_client = MyClient(intents=intents)