Spaces:
Runtime error
Runtime error
File size: 4,870 Bytes
e4f9cbe dbed4d4 fb71af1 b708786 e4f9cbe 544327d e4f9cbe 55dc3dd e4f9cbe cc5eabb 0dc939d e4f9cbe 54369d2 e4f9cbe 544327d e4f9cbe 544327d e4f9cbe 8796ec1 b708786 86be2dc 6a8124e b4ce410 81f7253 2c44166 81f7253 3a11016 81f7253 cc5eabb 81f7253 cc5eabb 81f7253 8e61415 81f7253 cc5eabb fb71af1 7b46386 e4f9cbe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
"""Serves the Lilac server."""
import logging
import os
import shutil
import subprocess
from typing import Any
from fastapi import APIRouter, FastAPI
from fastapi.responses import FileResponse, ORJSONResponse
from fastapi.routing import APIRoute
from fastapi.staticfiles import StaticFiles
from huggingface_hub import snapshot_download
from . import router_concept, router_data_loader, router_dataset, router_signal, router_tasks
from .concepts.db_concept import DiskConceptDB, get_concept_output_dir
from .config import CONFIG, data_path
from .router_utils import RouteErrorHandler
from .tasks import task_manager
from .utils import get_dataset_output_dir, list_datasets
DIST_PATH = os.path.abspath(os.path.join('web', 'blueprint', 'build'))
tags_metadata: list[dict[str, Any]] = [{
'name': 'datasets',
'description': 'API for querying a dataset.',
}, {
'name': 'concepts',
'description': 'API for managing concepts.',
}, {
'name': 'data_loaders',
'description': 'API for loading data.',
}, {
'name': 'signals',
'description': 'API for managing signals.',
}]
def custom_generate_unique_id(route: APIRoute) -> str:
"""Generate the name for the API endpoint."""
return route.name
app = FastAPI(
default_response_class=ORJSONResponse,
generate_unique_id_function=custom_generate_unique_id,
openapi_tags=tags_metadata)
v1_router = APIRouter(route_class=RouteErrorHandler)
v1_router.include_router(router_dataset.router, prefix='/datasets', tags=['datasets'])
v1_router.include_router(router_concept.router, prefix='/concepts', tags=['concepts'])
v1_router.include_router(router_data_loader.router, prefix='/data_loaders', tags=['data_loaders'])
v1_router.include_router(router_signal.router, prefix='/signals', tags=['signals'])
v1_router.include_router(router_tasks.router, prefix='/tasks', tags=['tasks'])
app.include_router(v1_router, prefix='/api/v1')
@app.api_route('/{path_name}', include_in_schema=False)
def catch_all() -> FileResponse:
"""Catch any other requests and serve index for HTML5 history."""
return FileResponse(path=os.path.join(DIST_PATH, 'index.html'))
# Serve static files in production mode.
app.mount('/', StaticFiles(directory=DIST_PATH, html=True, check_dir=False))
@app.on_event('startup')
def startup() -> None:
"""Download dataset files from the HF space that was uploaded before building the image."""
print('env=', CONFIG)
# SPACE_ID is the HuggingFace Space ID environment variable that is automatically set by HF.
repo_id = CONFIG.get('SPACE_ID', None)
if repo_id:
# Download the huggingface space data. This includes code and datasets, so we move the datasets
# alone to the data directory.
spaces_download_dir = os.path.join(data_path(), '.hf_spaces', repo_id)
snapshot_download(
repo_id=repo_id,
repo_type='space',
local_dir=spaces_download_dir,
local_dir_use_symlinks=False,
cache_dir=os.path.join(data_path(), '.hf_cache'),
token=CONFIG['HF_ACCESS_TOKEN'])
# Copy datasets.
spaces_data_dir = os.path.join(spaces_download_dir, 'data')
datasets = list_datasets(spaces_data_dir)
for dataset in datasets:
spaces_dataset_output_dir = get_dataset_output_dir(spaces_data_dir, dataset.namespace,
dataset.dataset_name)
persistent_output_dir = get_dataset_output_dir(data_path(), dataset.namespace,
dataset.dataset_name)
# Huggingface doesn't let you selectively download files so we just copy the data directory
# out of the cloned space.
shutil.rmtree(persistent_output_dir, ignore_errors=True)
shutil.move(spaces_dataset_output_dir, persistent_output_dir)
# Copy concepts.
concepts = DiskConceptDB(spaces_data_dir).list()
for concept in concepts:
spaces_concept_output_dir = get_concept_output_dir(spaces_data_dir, concept.namespace,
concept.name)
persistent_output_dir = get_dataset_output_dir(data_path(), concept.namespace, concept.name)
shutil.rmtree(persistent_output_dir, ignore_errors=True)
shutil.move(spaces_concept_output_dir, persistent_output_dir)
def run(cmd: str) -> subprocess.CompletedProcess[bytes]:
"""Run a command and return the result."""
return subprocess.run(cmd, shell=True, check=True)
@app.on_event('shutdown')
async def shutdown_event() -> None:
"""Kill the task manager when FastAPI shuts down."""
await task_manager().stop()
class GetTasksFilter(logging.Filter):
"""Task filter for /tasks."""
def filter(self, record: logging.LogRecord) -> bool:
"""Filters out /api/v1/tasks/ from the logs."""
return record.getMessage().find('/api/v1/tasks/') == -1
logging.getLogger('uvicorn.access').addFilter(GetTasksFilter())
|