thryyyyy commited on
Commit
96d947e
·
1 Parent(s): 2ce826b

in memory backup

Browse files
Files changed (2) hide show
  1. backend/scripts/backup.py +76 -88
  2. backend/start.sh +0 -1
backend/scripts/backup.py CHANGED
@@ -5,42 +5,26 @@ import subprocess
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
8
- from huggingface_hub import HfApi, hf_hub_download, CommitOperationAdd
 
 
9
 
10
  # Set up path to include the application module
11
  SCRIPT_DIR = Path(__file__).parent.resolve()
12
  BACKEND_DIR = SCRIPT_DIR.parent
13
  sys.path.append(str(BACKEND_DIR))
14
 
15
- # Database paths
16
  DATA_DIR = os.environ.get("DATA_DIR", "/app/backend/data")
17
  DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db")
18
 
19
- # Backup paths
20
- BACKUP_DIR = os.environ.get("BACKUP_DIR", "/tmp/open_webui/db_backup")
21
- TIMESTAMP_FILE_PATH = os.path.join(BACKUP_DIR, "last_backup_time.txt")
22
- DB_GPG_PATH = os.path.join(BACKUP_DIR, "webui.db.gpg")
23
-
24
- # Hugging Face repo paths
25
- REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt"
26
  REPO_DB_GPG_FILE = "db_backup/webui.db.gpg"
27
-
28
-
29
- def ensure_directories():
30
- try:
31
- os.makedirs(BACKUP_DIR, mode=0o755, exist_ok=True)
32
- test_file = os.path.join(BACKUP_DIR, '.write_test')
33
- with open(test_file, 'w') as f:
34
- f.write('test')
35
- os.remove(test_file)
36
- print(f"Successfully verified write access to {BACKUP_DIR}")
37
- return True
38
- except Exception as e:
39
- print(f"Error creating/verifying backup directory: {e}")
40
- return False
41
 
42
 
43
  def verify_database():
 
44
  if not os.path.exists(DB_FILE_PATH):
45
  print(f"Database file not found at: {DB_FILE_PATH}")
46
  return False
@@ -56,46 +40,55 @@ def verify_database():
56
  if result.lower() == "ok" and len(tables) > 0:
57
  print(f"Database verified: {len(tables)} tables found")
58
  return True
59
- else:
60
- print("Database verification failed")
61
- return False
62
  except Exception as e:
63
  print(f"Database verification error: {e}")
64
  return False
65
 
66
 
67
- def encrypt_database(passphrase):
 
68
  try:
69
- os.makedirs('/root/.gnupg', mode=0o700, exist_ok=True)
70
-
71
- encrypt_cmd = [
72
- "gpg",
73
- "--batch",
74
- "--yes",
75
- "--passphrase", passphrase,
76
- "--pinentry-mode", "loopback",
77
- "-c",
78
- "--cipher-algo", "AES256",
79
- "-o", DB_GPG_PATH,
80
- DB_FILE_PATH
81
- ]
82
-
83
- result = subprocess.run(encrypt_cmd, capture_output=True, text=True)
84
-
85
- if result.returncode != 0:
86
- print(f"GPG encryption failed: {result.stderr}")
87
- return False
88
-
89
- if os.path.exists(DB_GPG_PATH):
90
- print(f"Encryption successful. File size: {os.path.getsize(DB_GPG_PATH):,} bytes")
91
- return True
92
- return False
 
 
 
 
 
 
 
 
93
  except Exception as e:
94
  print(f"Encryption error: {e}")
95
- return False
96
 
97
 
98
  def get_last_backup_time(repo_id, hf_token):
 
99
  try:
100
  api = HfApi()
101
  files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token)
@@ -116,33 +109,18 @@ def get_last_backup_time(repo_id, hf_token):
116
  return None
117
 
118
 
119
- def save_timestamp_locally():
120
- try:
121
- now = datetime.datetime.now(datetime.timezone.utc)
122
- os.makedirs(os.path.dirname(TIMESTAMP_FILE_PATH), exist_ok=True)
123
-
124
- temp_path = f"{TIMESTAMP_FILE_PATH}.tmp"
125
- with open(temp_path, "w", encoding="utf-8") as f:
126
- f.write(now.isoformat())
127
- os.replace(temp_path, TIMESTAMP_FILE_PATH)
128
- return True
129
- except Exception as e:
130
- print(f"Error saving timestamp: {e}")
131
- return False
132
-
133
-
134
  def backup_db():
 
 
135
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
136
  hf_token = os.environ.get("HF_TOKEN")
137
  space_id = os.environ.get("SPACE_ID")
138
 
139
  if not all([passphrase, hf_token, space_id]):
140
- print("Error: Missing required environment variables (BACKUP_PASSPHRASE, HF_TOKEN, SPACE_ID)")
141
  return False
142
-
143
- if not ensure_directories():
144
- return False
145
-
146
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
147
  if threshold_minutes > 0:
148
  last_backup_dt = get_last_backup_time(space_id, hf_token)
@@ -154,33 +132,43 @@ def backup_db():
154
  if elapsed.total_seconds() < threshold_minutes * 60:
155
  print(f"Last backup was {elapsed.total_seconds()/60:.1f} min ago (threshold: {threshold_minutes})")
156
  return True
157
-
 
158
  if not verify_database():
159
  return False
160
-
161
- if not encrypt_database(passphrase):
 
 
 
162
  return False
163
-
164
- if not save_timestamp_locally():
165
- print("Warning: Failed to save timestamp")
166
-
 
 
 
167
  try:
168
  api = HfApi()
169
- operations = [
170
- CommitOperationAdd(path_in_repo=REPO_DB_GPG_FILE, path_or_fileobj=DB_GPG_PATH),
171
- CommitOperationAdd(path_in_repo=REPO_TIMESTAMP_FILE, path_or_fileobj=TIMESTAMP_FILE_PATH)
172
- ]
173
- api.create_commit(
 
 
 
 
 
174
  repo_id=space_id,
175
  repo_type="space",
176
- operations=operations,
177
- commit_message="Update encrypted database backup + timestamp",
178
  token=hf_token
179
  )
180
- print("Backup files uploaded successfully!")
181
  return True
182
  except Exception as e:
183
- print(f"Error uploading to HF: {e}")
184
  return False
185
 
186
 
 
5
  import datetime
6
  import sqlite3
7
  from pathlib import Path
8
+ from io import BytesIO
9
+ from huggingface_hub import HfApi, HfFileSystem, hf_hub_download
10
+ import tempfile
11
 
12
  # Set up path to include the application module
13
  SCRIPT_DIR = Path(__file__).parent.resolve()
14
  BACKEND_DIR = SCRIPT_DIR.parent
15
  sys.path.append(str(BACKEND_DIR))
16
 
17
+ # Database path (actual application database)
18
  DATA_DIR = os.environ.get("DATA_DIR", "/app/backend/data")
19
  DB_FILE_PATH = os.path.join(DATA_DIR, "webui.db")
20
 
21
+ # Hugging Face repo paths (virtual paths in HF storage)
 
 
 
 
 
 
22
  REPO_DB_GPG_FILE = "db_backup/webui.db.gpg"
23
+ REPO_TIMESTAMP_FILE = "db_backup/last_backup_time.txt"
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
 
26
  def verify_database():
27
+ """Verify database integrity."""
28
  if not os.path.exists(DB_FILE_PATH):
29
  print(f"Database file not found at: {DB_FILE_PATH}")
30
  return False
 
40
  if result.lower() == "ok" and len(tables) > 0:
41
  print(f"Database verified: {len(tables)} tables found")
42
  return True
43
+ print("Database verification failed")
44
+ return False
 
45
  except Exception as e:
46
  print(f"Database verification error: {e}")
47
  return False
48
 
49
 
50
+ def encrypt_database_to_memory(passphrase):
51
+ """Encrypt database directly to a memory buffer."""
52
  try:
53
+ # Create a secure temporary directory for GPG
54
+ with tempfile.TemporaryDirectory(prefix='gpg_home_') as gpg_home:
55
+ os.chmod(gpg_home, 0o700)
56
+
57
+ encrypt_cmd = [
58
+ "gpg",
59
+ "--batch",
60
+ "--yes",
61
+ "--homedir", gpg_home,
62
+ "--passphrase", passphrase,
63
+ "--pinentry-mode", "loopback",
64
+ "-c",
65
+ "--cipher-algo", "AES256",
66
+ "-o", "-", # Output to stdout
67
+ DB_FILE_PATH
68
+ ]
69
+
70
+ # Run GPG and capture output directly
71
+ result = subprocess.run(
72
+ encrypt_cmd,
73
+ capture_output=True,
74
+ check=True
75
+ )
76
+
77
+ if result.returncode != 0:
78
+ print(f"GPG encryption failed: {result.stderr.decode()}")
79
+ return None
80
+
81
+ return result.stdout
82
+ except subprocess.CalledProcessError as e:
83
+ print(f"GPG process error: {e.stderr.decode()}")
84
+ return None
85
  except Exception as e:
86
  print(f"Encryption error: {e}")
87
+ return None
88
 
89
 
90
  def get_last_backup_time(repo_id, hf_token):
91
+ """Get timestamp of last backup from HuggingFace."""
92
  try:
93
  api = HfApi()
94
  files = api.list_repo_files(repo_id=repo_id, repo_type="space", token=hf_token)
 
109
  return None
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def backup_db():
113
+ """Main backup function using streaming approach."""
114
+ # Validate environment
115
  passphrase = os.environ.get("BACKUP_PASSPHRASE")
116
  hf_token = os.environ.get("HF_TOKEN")
117
  space_id = os.environ.get("SPACE_ID")
118
 
119
  if not all([passphrase, hf_token, space_id]):
120
+ print("Error: Missing required environment variables")
121
  return False
122
+
123
+ # Check backup threshold
 
 
124
  threshold_minutes = int(os.environ.get("BACKUP_THRESHOLD_MINUTES", 120))
125
  if threshold_minutes > 0:
126
  last_backup_dt = get_last_backup_time(space_id, hf_token)
 
132
  if elapsed.total_seconds() < threshold_minutes * 60:
133
  print(f"Last backup was {elapsed.total_seconds()/60:.1f} min ago (threshold: {threshold_minutes})")
134
  return True
135
+
136
+ # Verify database before backup
137
  if not verify_database():
138
  return False
139
+
140
+ # Encrypt database to memory
141
+ print("Encrypting database...")
142
+ encrypted_data = encrypt_database_to_memory(passphrase)
143
+ if encrypted_data is None:
144
  return False
145
+ print(f"Database encrypted successfully: {len(encrypted_data)} bytes")
146
+
147
+ # Generate timestamp
148
+ timestamp = datetime.datetime.now(datetime.timezone.utc).isoformat()
149
+ timestamp_bytes = timestamp.encode('utf-8')
150
+
151
+ # Upload both files to HuggingFace
152
  try:
153
  api = HfApi()
154
+ api.upload_file(
155
+ path_or_fileobj=BytesIO(encrypted_data),
156
+ path_in_repo=REPO_DB_GPG_FILE,
157
+ repo_id=space_id,
158
+ repo_type="space",
159
+ token=hf_token
160
+ )
161
+ api.upload_file(
162
+ path_or_fileobj=BytesIO(timestamp_bytes),
163
+ path_in_repo=REPO_TIMESTAMP_FILE,
164
  repo_id=space_id,
165
  repo_type="space",
 
 
166
  token=hf_token
167
  )
168
+ print("Backup completed successfully!")
169
  return True
170
  except Exception as e:
171
+ print(f"Error uploading to HuggingFace: {e}")
172
  return False
173
 
174
 
backend/start.sh CHANGED
@@ -5,7 +5,6 @@ SCRIPT_DIR=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
5
  cd "$SCRIPT_DIR" || exit 1
6
 
7
  export DATA_DIR="/app/backend/data"
8
- export BACKUP_DIR="/tmp/open_webui/db_backup"
9
  export PYTHONPATH="/app/backend:${PYTHONPATH}"
10
 
11
  # Validate required environment variables
 
5
  cd "$SCRIPT_DIR" || exit 1
6
 
7
  export DATA_DIR="/app/backend/data"
 
8
  export PYTHONPATH="/app/backend:${PYTHONPATH}"
9
 
10
  # Validate required environment variables