jerome-white commited on
Commit
e946c57
1 Parent(s): 529dafe

Cleanup resources on Gradio state deletion

Browse files
Files changed (5) hide show
  1. app.py +17 -3
  2. mylib/__init__.py +1 -1
  3. mylib/_chat.py +5 -0
  4. mylib/_files.py +40 -23
  5. tools/cleanup.py +53 -0
app.py CHANGED
@@ -34,6 +34,10 @@ def load():
34
 
35
  return ChatState(database, messenger, chat)
36
 
 
 
 
 
37
  def upload(data, state):
38
  return state.database(data)
39
 
@@ -53,7 +57,10 @@ def prompt(message, history, state):
53
  #
54
  #
55
  with gr.Blocks() as demo:
56
- state = gr.State(load)
 
 
 
57
  with gr.Row():
58
  with gr.Column():
59
  repository = gr.Textbox(
@@ -67,7 +74,10 @@ with gr.Blocks() as demo:
67
  )
68
  data.upload(
69
  fn=upload,
70
- inputs=[data, state],
 
 
 
71
  outputs=repository,
72
  )
73
 
@@ -76,7 +86,11 @@ with gr.Blocks() as demo:
76
  interaction = gr.Textbox()
77
  interaction.submit(
78
  fn=prompt,
79
- inputs=[interaction, chatbot, state],
 
 
 
 
80
  outputs=chatbot,
81
  )
82
 
 
34
 
35
  return ChatState(database, messenger, chat)
36
 
37
+ def eject(state):
38
+ state.database.cleanup()
39
+ stat.chat.cleanup()
40
+
41
  def upload(data, state):
42
  return state.database(data)
43
 
 
57
  #
58
  #
59
  with gr.Blocks() as demo:
60
+ state = gr.State(
61
+ value=load,
62
+ delete_callback=eject,
63
+ )
64
  with gr.Row():
65
  with gr.Column():
66
  repository = gr.Textbox(
 
74
  )
75
  data.upload(
76
  fn=upload,
77
+ inputs=[
78
+ data,
79
+ state,
80
+ ],
81
  outputs=repository,
82
  )
83
 
 
86
  interaction = gr.Textbox()
87
  interaction.submit(
88
  fn=prompt,
89
+ inputs=[
90
+ interaction,
91
+ chatbot,
92
+ state,
93
+ ],
94
  outputs=chatbot,
95
  )
96
 
mylib/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  from ._chat import ChatController
2
- from ._files import FileManager
3
  from ._errors import ErrorLogger
4
  from ._logging import Logger
5
  from ._message import MessageHandler
 
1
  from ._chat import ChatController
2
+ from ._files import FileManager, VectorStoreManager
3
  from ._errors import ErrorLogger
4
  from ._logging import Logger
5
  from ._message import MessageHandler
mylib/_chat.py CHANGED
@@ -61,6 +61,11 @@ class ChatController:
61
 
62
  return self.send(prompt)
63
 
 
 
 
 
 
64
  def send(self, content):
65
  self.client.beta.threads.messages.create(
66
  self.thread.id,
 
61
 
62
  return self.send(prompt)
63
 
64
+ def cleanup(self):
65
+ self.client.beta.threads.delete(self.thread.id)
66
+ self.client.beta.assistants.delete(self.assistant.id)
67
+ self.attached = False
68
+
69
  def send(self, content):
70
  self.client.beta.threads.messages.create(
71
  self.thread.id,
mylib/_files.py CHANGED
@@ -50,39 +50,44 @@ class FileStream:
50
  s.close()
51
  self.streams.clear()
52
 
53
- class FileManager:
54
- def __init__(self, client, prefix, batch_size=20):
55
  self.client = client
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  self.prefix = prefix
57
  self.batch_size = batch_size
58
-
59
  self.storage = set()
60
- self.vector_store_id = None
61
 
62
  def __bool__(self):
63
  return self.vector_store_id is not None
64
 
65
- def __iter__(self):
66
- if self:
67
- kwargs = {}
68
- while True:
69
- vs_files = self.client.beta.vector_stores.files.list(
70
- vector_store_id=self.vector_store_id,
71
- **kwargs,
72
- )
73
- for f in vs_files.data:
74
- result = self.client.files.retrieve(f.id)
75
- yield result.filename
76
-
77
- if not vs_files.has_more:
78
- break
79
- kwargs['after'] = vs_files.after
80
-
81
  def __call__(self, paths):
82
  files = []
83
  self.test_and_setup()
84
 
85
- for p in self.ls(paths):
86
  with FileStream(p) as stream:
87
  for s in stream:
88
  if s.checksum not in self.storage:
@@ -92,7 +97,19 @@ class FileManager:
92
  self.put(files)
93
  files.clear()
94
 
95
- return '\n'.join(self)
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  def test_and_setup(self):
98
  if self:
@@ -105,7 +122,7 @@ class FileManager:
105
  )
106
  self.vector_store_id = vector_store.id
107
 
108
- def ls(self, paths):
109
  left = 0
110
  while left < len(paths):
111
  right = left + self.batch_size
 
50
  s.close()
51
  self.streams.clear()
52
 
53
+ class VectorStoreManager:
54
+ def __init__(self, client, vector_store_id):
55
  self.client = client
56
+ self.vector_store_id = vector_store_id
57
+
58
+ def __iter__(self):
59
+ while True:
60
+ vs_files = self.client.beta.vector_stores.files.list(
61
+ vector_store_id=self.vector_store_id,
62
+ **kwargs,
63
+ )
64
+ for f in vs_files.data:
65
+ yield f.id
66
+
67
+ if not vs_files.has_more:
68
+ break
69
+
70
+ def cleanup(self):
71
+ for i in self:
72
+ self.client.files.delete(i)
73
+ self.client.beta.vector_stores.delete(self.vector_store_id)
74
+
75
+ class FileManager(VectorStoreManager):
76
+ def __init__(self, client, prefix, batch_size=20):
77
+ super().__init__(client, None)
78
+
79
  self.prefix = prefix
80
  self.batch_size = batch_size
 
81
  self.storage = set()
 
82
 
83
  def __bool__(self):
84
  return self.vector_store_id is not None
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
  def __call__(self, paths):
87
  files = []
88
  self.test_and_setup()
89
 
90
+ for p in self.each(paths):
91
  with FileStream(p) as stream:
92
  for s in stream:
93
  if s.checksum not in self.storage:
 
97
  self.put(files)
98
  files.clear()
99
 
100
+ return '\n'.join(self.ls())
101
+
102
+ def ls(self):
103
+ for i in self:
104
+ self.client.files.retrieve(i)
105
+ yield i.filename
106
+
107
+ def cleanup(self):
108
+ if self.storage:
109
+ assert self.vector_store_id is not None
110
+ super().cleanup()
111
+ self.storage.clear()
112
+ self.vector_store_id = None
113
 
114
  def test_and_setup(self):
115
  if self:
 
122
  )
123
  self.vector_store_id = vector_store.id
124
 
125
+ def each(self, paths):
126
  left = 0
127
  while left < len(paths):
128
  right = left + self.batch_size
tools/cleanup.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import time
2
+ from argparse imAAport ArgumentParser
3
+
4
+ from scipy import constants
5
+ from openai import OpenAI
6
+
7
+ from mylib import Logger, VectorStoreManager
8
+
9
+ class AgeCheck:
10
+ def __init__(self):
11
+ self.now = time.time()
12
+
13
+ def __contains__(self, other):
14
+ raise NotImplementedError()
15
+
16
+ class NoAgeCheck(AgeCheck):
17
+ def __contains__(self, other):
18
+ return False
19
+
20
+ class HourAgeCheck(AgeCheck):
21
+ def __init__(self, hours):
22
+ super().__init__()
23
+ self.limit = hours
24
+
25
+ def __contains__(self, other):
26
+ age = (other - self.now) * constants.hour
27
+ return age < self.limit
28
+
29
+ if __name__ == '__main__':
30
+ arguments = ArgumentParser()
31
+ arguments.add_argument('--max-age-hours', type=int)
32
+ args = arguments.parse_args()
33
+
34
+ client = OpenAI()
35
+ if args.max_age_hours is None:
36
+ acheck = NoAgeCheck()
37
+ else:
38
+ acheck = HourAgeCheck(args.max_age_hours)
39
+
40
+ while True:
41
+ assistants = client.beta.assistants.list()
42
+ for a in assistants:
43
+ if a.created_at in acheck:
44
+ continue
45
+ if a.tool_resources.file_search is not None:
46
+ for i in a.tool_resources.file_search.vector_store_ids:
47
+ Logger.warning(f'{a.id} {i}')
48
+ vsm = VectorStoreManager(i)
49
+ vsm.cleanup()
50
+ client.beta.assistants.delete(a.id)
51
+
52
+ if not assistants.has_more:
53
+ break