TheoLvs commited on
Commit
4d6e8c2
·
1 Parent(s): 6fb5d57

Updated API submission

Browse files
.gitignore ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .ipynb_checkpoints/sandbox-checkpoint.ipynb
2
+
3
+ auto_evals/
4
+ venv/
5
+ __pycache__/
6
+ .env
7
+ .ipynb_checkpoints
8
+ *ipynb
9
+ .vscode/
10
+
11
+ eval-queue/
12
+ eval-results/
13
+ eval-queue-bk/
14
+ eval-results-bk/
15
+ logs/
16
+
17
+ emissions.csv
Dockerfile ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
2
+ # you will also find guides on how best to write your Dockerfile
3
+
4
+ FROM python:3.9
5
+
6
+ RUN useradd -m -u 1000 user
7
+ USER user
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+
10
+ WORKDIR /app
11
+
12
+ COPY --chown=user ./requirements.txt requirements.txt
13
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
14
+
15
+ COPY --chown=user . /app
16
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
app.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from dotenv import load_dotenv
3
+ from tasks import text, image, audio
4
+
5
+ # Load environment variables
6
+ load_dotenv()
7
+
8
+ app = FastAPI(
9
+ title="Frugal AI Challenge API",
10
+ description="API for the Frugal AI Challenge evaluation endpoints"
11
+ )
12
+
13
+ # Include all routers
14
+ app.include_router(text.router)
15
+ app.include_router(image.router)
16
+ app.include_router(audio.router)
17
+
18
+ @app.get("/")
19
+ async def root():
20
+ return {
21
+ "message": "Welcome to the Frugal AI Challenge API",
22
+ "endpoints": {
23
+ "text": "/text - Text classification task",
24
+ "image": "/image - Image classification task (coming soon)",
25
+ "audio": "/audio - Audio classification task (coming soon)"
26
+ }
27
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi>=0.68.0
2
+ uvicorn>=0.15.0
3
+ codecarbon>=2.3.1
4
+ datasets>=2.14.0
5
+ scikit-learn>=1.0.2
6
+ pydantic>=1.10.0
7
+ python-dotenv>=1.0.0
tasks/__init__.py ADDED
File without changes
tasks/audio.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from .utils.evaluation import AudioEvaluationRequest
3
+ from .utils.emissions import get_space_info
4
+
5
+ router = APIRouter()
6
+
7
+ @router.post("/audio", tags=["Audio Task"])
8
+ async def evaluate_audio(request: AudioEvaluationRequest):
9
+ """
10
+ Placeholder for audio task evaluation.
11
+ """
12
+ username, space_url = get_space_info()
13
+ return {
14
+ "message": "Audio evaluation endpoint not yet implemented",
15
+ "username": username,
16
+ "space_url": space_url,
17
+ "received_config": {
18
+ "dataset_name": request.dataset_name,
19
+ "test_size": request.test_size,
20
+ "test_seed": request.test_seed,
21
+ "model_description": request.model_description
22
+ }
23
+ }
tasks/image.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from .utils.evaluation import ImageEvaluationRequest
3
+ from .utils.emissions import get_space_info
4
+
5
+ router = APIRouter()
6
+
7
+ @router.post("/image", tags=["Image Task"])
8
+ async def evaluate_image(request: ImageEvaluationRequest):
9
+ """
10
+ Placeholder for image task evaluation.
11
+ """
12
+ username, space_url = get_space_info()
13
+ return {
14
+ "message": "Image evaluation endpoint not yet implemented",
15
+ "username": username,
16
+ "space_url": space_url,
17
+ "received_config": {
18
+ "dataset_name": request.dataset_name,
19
+ "test_size": request.test_size,
20
+ "test_seed": request.test_seed,
21
+ "model_description": request.model_description
22
+ }
23
+ }
tasks/text.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter
2
+ from datetime import datetime
3
+ from datasets import load_dataset
4
+ from sklearn.metrics import accuracy_score
5
+ import random
6
+
7
+ from .utils.evaluation import TextEvaluationRequest
8
+ from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
+
10
+ router = APIRouter()
11
+
12
+ @router.post("/text", tags=["Text Task"])
13
+ async def evaluate_text(request: TextEvaluationRequest):
14
+ """
15
+ Evaluate a text classification model for climate disinformation detection.
16
+ """
17
+ # Get space info
18
+ username, space_url = get_space_info()
19
+
20
+ # Define the label mapping
21
+ LABEL_MAPPING = {
22
+ "0_not_relevant": 0,
23
+ "1_not_happening": 1,
24
+ "2_not_human": 2,
25
+ "3_not_bad": 3,
26
+ "4_solutions_harmful_unnecessary": 4,
27
+ "5_science_unreliable": 5,
28
+ "6_proponents_biased": 6,
29
+ "7_fossil_fuels_needed": 7
30
+ }
31
+
32
+ # Load and prepare the dataset
33
+ dataset = load_dataset(request.dataset_name)
34
+
35
+ # Convert string labels to integers
36
+ dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
37
+
38
+ # Split dataset
39
+ train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
40
+ test_dataset = train_test["test"]
41
+
42
+ # Start tracking emissions
43
+ tracker.start()
44
+ tracker.start_task("inference")
45
+
46
+ # Make random predictions (placeholder for actual model inference)
47
+ true_labels = test_dataset["label"]
48
+ predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
49
+
50
+ # Stop tracking emissions
51
+ emissions_data = tracker.stop_task()
52
+
53
+ # Calculate accuracy
54
+ accuracy = accuracy_score(true_labels, predictions)
55
+
56
+ # Prepare results dictionary
57
+ results = {
58
+ "username": username,
59
+ "space_url": space_url,
60
+ "submission_timestamp": datetime.now().isoformat(),
61
+ "model_description": request.model_description,
62
+ "accuracy": float(accuracy),
63
+ "energy_consumed_wh": emissions_data.energy_consumed * 1000,
64
+ "emissions_gco2eq": emissions_data.emissions * 1000,
65
+ "emissions_data": clean_emissions_data(emissions_data),
66
+ "dataset_config": {
67
+ "dataset_name": request.dataset_name,
68
+ "test_size": request.test_size,
69
+ "test_seed": request.test_seed
70
+ }
71
+ }
72
+
73
+ return results
tasks/utils/__init__.py ADDED
File without changes
tasks/utils/emissions.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from codecarbon import EmissionsTracker
2
+ import os
3
+
4
+ # Initialize tracker
5
+ tracker = EmissionsTracker(allow_multiple_runs=True)
6
+
7
+ class EmissionsData:
8
+ def __init__(self, energy_consumed: float, emissions: float):
9
+ self.energy_consumed = energy_consumed
10
+ self.emissions = emissions
11
+
12
+ def clean_emissions_data(emissions_data):
13
+ """Remove unwanted fields from emissions data"""
14
+ data_dict = emissions_data.__dict__
15
+ fields_to_remove = ['timestamp', 'project_name', 'experiment_id', 'latitude', 'longitude']
16
+ return {k: v for k, v in data_dict.items() if k not in fields_to_remove}
17
+
18
+ def get_space_info():
19
+ """Get the space username and URL from environment variables"""
20
+ space_name = os.getenv("SPACE_ID", "")
21
+ if space_name:
22
+ try:
23
+ username = space_name.split("/")[0]
24
+ space_url = f"https://huggingface.co/spaces/{space_name}"
25
+ return username, space_url
26
+ except Exception as e:
27
+ print(f"Error getting space info: {e}")
28
+ return "local-user", "local-development"
tasks/utils/evaluation.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ from pydantic import BaseModel, Field
3
+
4
+ class BaseEvaluationRequest(BaseModel):
5
+ test_size: float = Field(0.2, ge=0.0, le=1.0, description="Size of the test split (between 0 and 1)")
6
+ test_seed: int = Field(42, ge=0, description="Random seed for reproducibility")
7
+ model_description: Optional[str] = Field("No description provided", description="Description of the model being evaluated")
8
+
9
+ class TextEvaluationRequest(BaseEvaluationRequest):
10
+ dataset_name: str = Field("QuotaClimat/frugalaichallenge-text-train",
11
+ description="The name of the dataset on HuggingFace Hub")
12
+
13
+ class ImageEvaluationRequest(BaseEvaluationRequest):
14
+ dataset_name: str = Field("placeholder/frugalaichallenge-image-train",
15
+ description="The name of the dataset on HuggingFace Hub")
16
+
17
+ class AudioEvaluationRequest(BaseEvaluationRequest):
18
+ dataset_name: str = Field("placeholder/frugalaichallenge-audio-train",
19
+ description="The name of the dataset on HuggingFace Hub")