yangwang commited on
Commit
93a8901
·
1 Parent(s): ecdc083

[feat] support google sheets

Browse files
Files changed (2) hide show
  1. main.py +245 -111
  2. requirements.txt +3 -1
main.py CHANGED
@@ -1,13 +1,26 @@
1
- from fastapi import FastAPI, HTTPException, status
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.staticfiles import StaticFiles
4
- from pydantic import BaseModel
5
- from typing import List, Dict, Optional
6
- import hashlib
7
- import csv
8
  import os
 
 
 
 
9
  from datetime import datetime
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  app = FastAPI(title="Data Samples API")
12
 
13
  # Add CORS middleware to allow web requests
@@ -32,8 +45,47 @@ class SampleResponse(BaseModel):
32
  modified_datetime: str
33
 
34
 
35
- # File path for TSV storage
36
- SAMPLES_FILE = "samples.tsv"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
 
39
  # Helper function to generate ID from text
@@ -47,137 +99,219 @@ def get_current_datetime() -> str:
47
  return datetime.now().isoformat()
48
 
49
 
50
- # Helper functions for TSV operations
51
- def load_samples_from_tsv():
52
- samples = {}
53
- if os.path.exists(SAMPLES_FILE):
54
- with open(SAMPLES_FILE, 'r') as file:
55
- reader = csv.reader(file, delimiter='\t')
56
- for row in reader:
57
- if len(row) >= 4: # id, text, created_datetime, modified_datetime
58
- sample_id = row[0]
59
- samples[sample_id] = SampleResponse(
60
- id=sample_id,
61
- text=row[1],
62
- created_datetime=row[2],
63
- modified_datetime=row[3]
64
- )
65
- return samples
66
-
67
-
68
- def save_samples_to_tsv(samples):
69
- with open(SAMPLES_FILE, 'w', newline='') as file:
70
- writer = csv.writer(file, delimiter='\t')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  for sample in samples.values():
72
- writer.writerow([
73
- sample.id,
74
- sample.text,
75
- sample.created_datetime,
76
- sample.modified_datetime
77
- ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
 
80
- # Get samples from TSV file
81
  def get_samples():
82
- return load_samples_from_tsv()
83
 
84
 
85
  @app.post("/samples/", response_model=SampleResponse, status_code=status.HTTP_201_CREATED)
86
  async def add_sample(sample: Sample):
87
  """Add a new data sample"""
88
- # Load current samples
89
- samples = get_samples()
90
-
91
- # Generate ID based on text content
92
- sample_id = generate_id_from_text(sample.text)
93
-
94
- # Get current datetime
95
- current_time = get_current_datetime()
96
-
97
- # Create a sample response with ID, text, and timestamps
98
- sample_response = SampleResponse(
99
- id=sample_id,
100
- text=sample.text,
101
- created_datetime=current_time,
102
- modified_datetime=current_time
103
- )
104
-
105
- # Store the sample
106
- samples[sample_id] = sample_response
107
-
108
- # Save to TSV
109
- save_samples_to_tsv(samples)
110
-
111
- return sample_response
 
 
 
 
 
112
 
113
 
114
  @app.get("/samples/", response_model=List[SampleResponse])
115
  async def get_all_samples():
116
  """Get all data samples"""
117
- samples = get_samples()
118
- return list(samples.values())
 
 
 
 
 
 
119
 
120
 
121
  @app.get("/samples/{sample_id}", response_model=SampleResponse)
122
  async def get_sample(sample_id: str):
123
  """Get a specific data sample by ID"""
124
- samples = get_samples()
125
- if sample_id not in samples:
126
- raise HTTPException(status_code=404, detail="Sample not found")
127
- return samples[sample_id]
 
 
 
 
 
 
 
128
 
129
 
130
  @app.put("/samples/{sample_id}", response_model=SampleResponse)
131
  async def update_sample(sample_id: str, updated_sample: Sample):
132
  """Update an existing data sample"""
133
- samples = get_samples()
134
- if sample_id not in samples:
135
- raise HTTPException(status_code=404, detail="Sample not found")
136
-
137
- # Get the existing sample to preserve created_datetime
138
- existing_sample = samples[sample_id]
139
-
140
- # Get current datetime for modified_datetime
141
- current_time = get_current_datetime()
142
-
143
- # Create updated sample with original ID and updated timestamps
144
- updated_sample_response = SampleResponse(
145
- id=sample_id,
146
- text=updated_sample.text,
147
- created_datetime=existing_sample.created_datetime,
148
- modified_datetime=current_time
149
- )
150
-
151
- # Store the updated sample
152
- samples[sample_id] = updated_sample_response
153
-
154
- # Save to TSV
155
- save_samples_to_tsv(samples)
156
-
157
- return updated_sample_response
 
 
 
 
 
 
 
 
158
 
159
 
160
  @app.delete("/samples/{sample_id}", status_code=status.HTTP_204_NO_CONTENT)
161
  async def delete_sample(sample_id: str):
162
  """Delete a data sample"""
163
- samples = get_samples()
164
- if sample_id not in samples:
165
- raise HTTPException(status_code=404, detail="Sample not found")
166
-
167
- # Remove the sample
168
- del samples[sample_id]
169
-
170
- # Save to TSV
171
- save_samples_to_tsv(samples)
172
-
173
- return None
174
-
175
-
176
- # Create empty TSV file if it doesn't exist
177
- if not os.path.exists(SAMPLES_FILE):
178
- save_samples_to_tsv({})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
179
 
180
  # Mount the static files directory at the end after defining all API routes
181
- # This serves the index.html file at root (/) while preserving API routes
182
- # app.mount("/", StaticFiles(directory=".", html=True), name="static")
183
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
 
 
 
 
 
 
 
1
  import os
2
+ import json
3
+ import hashlib
4
+ import logging
5
+ import tempfile
6
  from datetime import datetime
7
 
8
+ import pygsheets
9
+ import pandas as pd
10
+ from typing import List
11
+ from pydantic import BaseModel
12
+
13
+ from fastapi import FastAPI, HTTPException, status, Request
14
+ from fastapi.middleware.cors import CORSMiddleware
15
+ from fastapi.staticfiles import StaticFiles
16
+ from fastapi.responses import JSONResponse
17
+
18
+ # Set up logging
19
+ logging.basicConfig(
20
+ level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
21
+ )
22
+ logger = logging.getLogger(__name__)
23
+
24
  app = FastAPI(title="Data Samples API")
25
 
26
  # Add CORS middleware to allow web requests
 
45
  modified_datetime: str
46
 
47
 
48
+ # Exception handler for Google Sheets issues
49
+ @app.exception_handler(Exception)
50
+ async def general_exception_handler(request: Request, exc: Exception):
51
+ logger.error(f"Unhandled exception: {exc}")
52
+ return JSONResponse(
53
+ status_code=500,
54
+ content={"detail": f"Internal server error: {str(exc)}"}
55
+ )
56
+
57
+
58
+ # Google Sheets setup - using environment variables for credentials
59
+ def get_worksheet():
60
+ try:
61
+ # Check if running in HuggingFace Spaces or local environment
62
+ if "GOOGLE_SERVICE_JSON_CONTENT" in os.environ:
63
+ # Get credentials from environment variable
64
+ service_account_info = json.loads(os.environ["GOOGLE_SERVICE_JSON_CONTENT"])
65
+
66
+ # Create a temporary JSON file with the credentials
67
+ with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix='.json') as temp_file:
68
+ json.dump(service_account_info, temp_file)
69
+ temp_filepath = temp_file.name
70
+
71
+ try:
72
+ # Authorize with the temporary file
73
+ gc = pygsheets.authorize(service_file=temp_filepath)
74
+ # Open the spreadsheet and return the first worksheet
75
+ sh = gc.open('drivelology')
76
+ return sh[0]
77
+ finally:
78
+ # Clean up the temporary file
79
+ if os.path.exists(temp_filepath):
80
+ os.unlink(temp_filepath)
81
+ else:
82
+ # Local development - use the JSON file directly
83
+ gc = pygsheets.authorize(service_file='drivelology-1b65510988e8.json')
84
+ sh = gc.open('drivelology')
85
+ return sh[0] # Return the first worksheet
86
+ except Exception as e:
87
+ logger.error(f"Error accessing Google Sheets: {e}")
88
+ raise HTTPException(status_code=500, detail=f"Failed to access Google Sheets: {str(e)}")
89
 
90
 
91
  # Helper function to generate ID from text
 
99
  return datetime.now().isoformat()
100
 
101
 
102
+ # Helper functions for Google Sheets operations
103
+ def load_samples_from_sheets():
104
+ try:
105
+ worksheet = get_worksheet()
106
+ records = worksheet.get_all_records()
107
+
108
+ # Debug log
109
+ logger.info(f"Retrieved {len(records)} records from Google Sheets")
110
+
111
+ samples = {}
112
+ for record in records:
113
+ if record and 'id' in record and 'text' in record:
114
+ # Handle potential missing fields with defaults
115
+ sample_id = record['id']
116
+ created_time = record.get('created_datetime', get_current_datetime())
117
+ modified_time = record.get('modified_datetime', created_time)
118
+
119
+ samples[sample_id] = SampleResponse(
120
+ id=sample_id,
121
+ text=record['text'],
122
+ created_datetime=created_time,
123
+ modified_datetime=modified_time
124
+ )
125
+
126
+ logger.info(f"Processed {len(samples)} valid samples")
127
+ return samples
128
+ except Exception as e:
129
+ logger.error(f"Error loading samples from sheets: {e}")
130
+ # Return empty dict on error to avoid crashing
131
+ return {}
132
+
133
+
134
+ def save_samples_to_sheets(samples):
135
+ try:
136
+ worksheet = get_worksheet()
137
+
138
+ # Convert samples to DataFrame
139
+ data = []
140
  for sample in samples.values():
141
+ data.append({
142
+ 'id': sample.id,
143
+ 'text': sample.text,
144
+ 'created_datetime': sample.created_datetime,
145
+ 'modified_datetime': sample.modified_datetime
146
+ })
147
+
148
+ # Create DataFrame and update the worksheet
149
+ df = pd.DataFrame(data)
150
+ if not data:
151
+ # If no samples, create an empty dataframe with the right columns
152
+ df = pd.DataFrame(columns=['id', 'text', 'created_datetime', 'modified_datetime'])
153
+
154
+ # Clear the worksheet and update with new data
155
+ worksheet.clear()
156
+ worksheet.set_dataframe(df, (0, 0))
157
+
158
+ logger.info(f"Successfully saved {len(samples)} samples to Google Sheets")
159
+ return True
160
+ except Exception as e:
161
+ logger.error(f"Error saving samples to sheets: {e}")
162
+ raise HTTPException(status_code=500, detail=f"Failed to save data: {str(e)}")
163
 
164
 
165
+ # Get samples from Google Sheets
166
  def get_samples():
167
+ return load_samples_from_sheets()
168
 
169
 
170
  @app.post("/samples/", response_model=SampleResponse, status_code=status.HTTP_201_CREATED)
171
  async def add_sample(sample: Sample):
172
  """Add a new data sample"""
173
+ try:
174
+ # Load current samples
175
+ samples = get_samples()
176
+
177
+ # Generate ID based on text content
178
+ sample_id = generate_id_from_text(sample.text)
179
+
180
+ # Get current datetime
181
+ current_time = get_current_datetime()
182
+
183
+ # Create a sample response with ID, text, and timestamps
184
+ sample_response = SampleResponse(
185
+ id=sample_id,
186
+ text=sample.text,
187
+ created_datetime=current_time,
188
+ modified_datetime=current_time
189
+ )
190
+
191
+ # Store the sample
192
+ samples[sample_id] = sample_response
193
+
194
+ # Save to Google Sheets
195
+ save_samples_to_sheets(samples)
196
+
197
+ logger.info(f"Added new sample with ID: {sample_id}")
198
+ return sample_response
199
+ except Exception as e:
200
+ logger.error(f"Error adding sample: {e}")
201
+ raise HTTPException(status_code=500, detail=f"Failed to add sample: {str(e)}")
202
 
203
 
204
  @app.get("/samples/", response_model=List[SampleResponse])
205
  async def get_all_samples():
206
  """Get all data samples"""
207
+ try:
208
+ samples = get_samples()
209
+ result = list(samples.values())
210
+ logger.info(f"Returning {len(result)} samples")
211
+ return result
212
+ except Exception as e:
213
+ logger.error(f"Error retrieving samples: {e}")
214
+ raise HTTPException(status_code=500, detail=f"Failed to retrieve samples: {str(e)}")
215
 
216
 
217
  @app.get("/samples/{sample_id}", response_model=SampleResponse)
218
  async def get_sample(sample_id: str):
219
  """Get a specific data sample by ID"""
220
+ try:
221
+ samples = get_samples()
222
+ if sample_id not in samples:
223
+ logger.warning(f"Sample ID not found: {sample_id}")
224
+ raise HTTPException(status_code=404, detail="Sample not found")
225
+ return samples[sample_id]
226
+ except HTTPException:
227
+ raise
228
+ except Exception as e:
229
+ logger.error(f"Error retrieving sample {sample_id}: {e}")
230
+ raise HTTPException(status_code=500, detail=f"Failed to retrieve sample: {str(e)}")
231
 
232
 
233
  @app.put("/samples/{sample_id}", response_model=SampleResponse)
234
  async def update_sample(sample_id: str, updated_sample: Sample):
235
  """Update an existing data sample"""
236
+ try:
237
+ samples = get_samples()
238
+ if sample_id not in samples:
239
+ logger.warning(f"Attempted to update non-existent sample: {sample_id}")
240
+ raise HTTPException(status_code=404, detail="Sample not found")
241
+
242
+ # Get the existing sample to preserve created_datetime
243
+ existing_sample = samples[sample_id]
244
+
245
+ # Get current datetime for modified_datetime
246
+ current_time = get_current_datetime()
247
+
248
+ # Create updated sample with original ID and updated timestamps
249
+ updated_sample_response = SampleResponse(
250
+ id=sample_id,
251
+ text=updated_sample.text,
252
+ created_datetime=existing_sample.created_datetime,
253
+ modified_datetime=current_time
254
+ )
255
+
256
+ # Store the updated sample
257
+ samples[sample_id] = updated_sample_response
258
+
259
+ # Save to Google Sheets
260
+ save_samples_to_sheets(samples)
261
+
262
+ logger.info(f"Updated sample with ID: {sample_id}")
263
+ return updated_sample_response
264
+ except HTTPException:
265
+ raise
266
+ except Exception as e:
267
+ logger.error(f"Error updating sample {sample_id}: {e}")
268
+ raise HTTPException(status_code=500, detail=f"Failed to update sample: {str(e)}")
269
 
270
 
271
  @app.delete("/samples/{sample_id}", status_code=status.HTTP_204_NO_CONTENT)
272
  async def delete_sample(sample_id: str):
273
  """Delete a data sample"""
274
+ try:
275
+ samples = get_samples()
276
+ if sample_id not in samples:
277
+ logger.warning(f"Attempted to delete non-existent sample: {sample_id}")
278
+ raise HTTPException(status_code=404, detail="Sample not found")
279
+
280
+ # Remove the sample
281
+ del samples[sample_id]
282
+
283
+ # Save to Google Sheets
284
+ save_samples_to_sheets(samples)
285
+
286
+ logger.info(f"Deleted sample with ID: {sample_id}")
287
+ return None
288
+ except HTTPException:
289
+ raise
290
+ except Exception as e:
291
+ logger.error(f"Error deleting sample {sample_id}: {e}")
292
+ raise HTTPException(status_code=500, detail=f"Failed to delete sample: {str(e)}")
293
+
294
+
295
+ # Initialize the Google Sheet with headers if needed
296
+ def initialize_sheet():
297
+ try:
298
+ worksheet = get_worksheet()
299
+ # Check if the worksheet has headers
300
+ records = worksheet.get_all_records()
301
+ # If empty, add headers
302
+ if not records:
303
+ logger.info("Initializing empty Google Sheet with headers")
304
+ df = pd.DataFrame(columns=['id', 'text', 'created_datetime', 'modified_datetime'])
305
+ worksheet.set_dataframe(df, (0, 0))
306
+ logger.info("Google Sheet initialized successfully")
307
+ except Exception as e:
308
+ logger.error(f"Error initializing sheet: {e}")
309
+ # Don't raise exception here to allow the app to start even if sheet init fails
310
+ pass
311
+
312
+
313
+ # Initialize the sheet when the application starts
314
+ initialize_sheet()
315
 
316
  # Mount the static files directory at the end after defining all API routes
 
 
317
  app.mount("/", StaticFiles(directory="static", html=True), name="static")
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  fastapi==0.104.0
2
  pydantic==2.11.3
3
- uvicorn==0.23.2
 
 
 
1
  fastapi==0.104.0
2
  pydantic==2.11.3
3
+ uvicorn==0.23.2
4
+ pandas==2.2.3
5
+ pygsheets==2.0.6