TheoLvs commited on
Commit
70f5f26
·
1 Parent(s): 4d6e8c2

Updated API

Browse files
Files changed (6) hide show
  1. README.md +62 -1
  2. requirements.txt +3 -1
  3. tasks/audio.py +11 -4
  4. tasks/image.py +11 -4
  5. tasks/text.py +20 -3
  6. tasks/utils/evaluation.py +0 -1
README.md CHANGED
@@ -7,4 +7,65 @@ sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  pinned: false
8
  ---
9
 
10
+
11
+ # Random Baseline Model for Climate Disinformation Classification
12
+
13
+ ## Model Description
14
+
15
+ This is a random baseline model for the Frugal AI Challenge 2024, specifically for the text classification task of identifying climate disinformation. The model serves as a performance floor, randomly assigning labels to text inputs without any learning.
16
+
17
+ ### Intended Use
18
+
19
+ - **Primary intended uses**: Baseline comparison for climate disinformation classification models
20
+ - **Primary intended users**: Researchers and developers participating in the Frugal AI Challenge
21
+ - **Out-of-scope use cases**: Not intended for production use or real-world classification tasks
22
+
23
+ ## Training Data
24
+
25
+ The model uses the QuotaClimat/frugalaichallenge-text-train dataset:
26
+ - Size: ~6000 examples
27
+ - Split: 80% train, 20% test
28
+ - 8 categories of climate disinformation claims
29
+
30
+ ### Labels
31
+ 0. No relevant claim detected
32
+ 1. Global warming is not happening
33
+ 2. Not caused by humans
34
+ 3. Not bad or beneficial
35
+ 4. Solutions harmful/unnecessary
36
+ 5. Science is unreliable
37
+ 6. Proponents are biased
38
+ 7. Fossil fuels are needed
39
+
40
+ ## Performance
41
+
42
+ ### Metrics
43
+ - **Accuracy**: ~12.5% (random chance with 8 classes)
44
+ - **Environmental Impact**:
45
+ - Emissions tracked in gCO2eq
46
+ - Energy consumption tracked in Wh
47
+
48
+ ### Model Architecture
49
+ The model implements a random choice between the 8 possible labels, serving as the simplest possible baseline.
50
+
51
+ ## Environmental Impact
52
+
53
+ Environmental impact is tracked using CodeCarbon, measuring:
54
+ - Carbon emissions during inference
55
+ - Energy consumption during inference
56
+
57
+ This tracking helps establish a baseline for the environmental impact of model deployment and inference.
58
+
59
+ ## Limitations
60
+ - Makes completely random predictions
61
+ - No learning or pattern recognition
62
+ - No consideration of input text
63
+ - Serves only as a baseline reference
64
+ - Not suitable for any real-world applications
65
+
66
+ ## Ethical Considerations
67
+
68
+ - Dataset contains sensitive topics related to climate disinformation
69
+ - Model makes random predictions and should not be used for actual classification
70
+ - Environmental impact is tracked to promote awareness of AI's carbon footprint
71
+ ```
requirements.txt CHANGED
@@ -4,4 +4,6 @@ codecarbon>=2.3.1
4
  datasets>=2.14.0
5
  scikit-learn>=1.0.2
6
  pydantic>=1.10.0
7
- python-dotenv>=1.0.0
 
 
 
4
  datasets>=2.14.0
5
  scikit-learn>=1.0.2
6
  pydantic>=1.10.0
7
+ python-dotenv>=1.0.0
8
+ gradio>=4.0.0
9
+ requests>=2.31.0
tasks/audio.py CHANGED
@@ -4,20 +4,27 @@ from .utils.emissions import get_space_info
4
 
5
  router = APIRouter()
6
 
7
- @router.post("/audio", tags=["Audio Task"])
 
 
 
8
  async def evaluate_audio(request: AudioEvaluationRequest):
9
  """
10
- Placeholder for audio task evaluation.
 
 
 
 
11
  """
12
  username, space_url = get_space_info()
13
  return {
14
  "message": "Audio evaluation endpoint not yet implemented",
15
  "username": username,
16
  "space_url": space_url,
 
17
  "received_config": {
18
  "dataset_name": request.dataset_name,
19
  "test_size": request.test_size,
20
- "test_seed": request.test_seed,
21
- "model_description": request.model_description
22
  }
23
  }
 
4
 
5
  router = APIRouter()
6
 
7
+ DESCRIPTION = "Random Baseline"
8
+
9
+ @router.post("/audio", tags=["Audio Task"],
10
+ description=DESCRIPTION)
11
  async def evaluate_audio(request: AudioEvaluationRequest):
12
  """
13
+ Evaluate audio classification.
14
+
15
+ Current Model: Random Baseline
16
+ - Makes random predictions
17
+ - Used as a baseline for comparison
18
  """
19
  username, space_url = get_space_info()
20
  return {
21
  "message": "Audio evaluation endpoint not yet implemented",
22
  "username": username,
23
  "space_url": space_url,
24
+ "model_description": DESCRIPTION,
25
  "received_config": {
26
  "dataset_name": request.dataset_name,
27
  "test_size": request.test_size,
28
+ "test_seed": request.test_seed
 
29
  }
30
  }
tasks/image.py CHANGED
@@ -4,20 +4,27 @@ from .utils.emissions import get_space_info
4
 
5
  router = APIRouter()
6
 
7
- @router.post("/image", tags=["Image Task"])
 
 
 
8
  async def evaluate_image(request: ImageEvaluationRequest):
9
  """
10
- Placeholder for image task evaluation.
 
 
 
 
11
  """
12
  username, space_url = get_space_info()
13
  return {
14
  "message": "Image evaluation endpoint not yet implemented",
15
  "username": username,
16
  "space_url": space_url,
 
17
  "received_config": {
18
  "dataset_name": request.dataset_name,
19
  "test_size": request.test_size,
20
- "test_seed": request.test_seed,
21
- "model_description": request.model_description
22
  }
23
  }
 
4
 
5
  router = APIRouter()
6
 
7
+ DESCRIPTION = "Random Baseline"
8
+
9
+ @router.post("/image", tags=["Image Task"],
10
+ description=DESCRIPTION)
11
  async def evaluate_image(request: ImageEvaluationRequest):
12
  """
13
+ Evaluate image classification.
14
+
15
+ Current Model: Random Baseline
16
+ - Makes random predictions
17
+ - Used as a baseline for comparison
18
  """
19
  username, space_url = get_space_info()
20
  return {
21
  "message": "Image evaluation endpoint not yet implemented",
22
  "username": username,
23
  "space_url": space_url,
24
+ "model_description": DESCRIPTION,
25
  "received_config": {
26
  "dataset_name": request.dataset_name,
27
  "test_size": request.test_size,
28
+ "test_seed": request.test_seed
 
29
  }
30
  }
tasks/text.py CHANGED
@@ -9,10 +9,17 @@ from .utils.emissions import tracker, clean_emissions_data, get_space_info
9
 
10
  router = APIRouter()
11
 
12
- @router.post("/text", tags=["Text Task"])
 
 
 
13
  async def evaluate_text(request: TextEvaluationRequest):
14
  """
15
- Evaluate a text classification model for climate disinformation detection.
 
 
 
 
16
  """
17
  # Get space info
18
  username, space_url = get_space_info()
@@ -42,10 +49,20 @@ async def evaluate_text(request: TextEvaluationRequest):
42
  # Start tracking emissions
43
  tracker.start()
44
  tracker.start_task("inference")
 
 
 
 
 
45
 
46
  # Make random predictions (placeholder for actual model inference)
47
  true_labels = test_dataset["label"]
48
  predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
 
 
 
 
 
49
 
50
  # Stop tracking emissions
51
  emissions_data = tracker.stop_task()
@@ -58,7 +75,7 @@ async def evaluate_text(request: TextEvaluationRequest):
58
  "username": username,
59
  "space_url": space_url,
60
  "submission_timestamp": datetime.now().isoformat(),
61
- "model_description": request.model_description,
62
  "accuracy": float(accuracy),
63
  "energy_consumed_wh": emissions_data.energy_consumed * 1000,
64
  "emissions_gco2eq": emissions_data.emissions * 1000,
 
9
 
10
  router = APIRouter()
11
 
12
+ DESCRIPTION = "Random Baseline"
13
+
14
+ @router.post("/text", tags=["Text Task"],
15
+ description=DESCRIPTION)
16
  async def evaluate_text(request: TextEvaluationRequest):
17
  """
18
+ Evaluate text classification for climate disinformation detection.
19
+
20
+ Current Model: Random Baseline
21
+ - Makes random predictions from the label space (0-7)
22
+ - Used as a baseline for comparison
23
  """
24
  # Get space info
25
  username, space_url = get_space_info()
 
49
  # Start tracking emissions
50
  tracker.start()
51
  tracker.start_task("inference")
52
+
53
+ #--------------------------------------------------------------------------------------------
54
+ # YOUR MODEL INFERENCE CODE HERE
55
+ # Update the code below to replace the random baseline by your model inference within the inference pass where the energy consumption and emissions are tracked.
56
+ #--------------------------------------------------------------------------------------------
57
 
58
  # Make random predictions (placeholder for actual model inference)
59
  true_labels = test_dataset["label"]
60
  predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
61
+
62
+ #--------------------------------------------------------------------------------------------
63
+ # YOUR MODEL INFERENCE STOPS HERE
64
+ #--------------------------------------------------------------------------------------------
65
+
66
 
67
  # Stop tracking emissions
68
  emissions_data = tracker.stop_task()
 
75
  "username": username,
76
  "space_url": space_url,
77
  "submission_timestamp": datetime.now().isoformat(),
78
+ "model_description": DESCRIPTION,
79
  "accuracy": float(accuracy),
80
  "energy_consumed_wh": emissions_data.energy_consumed * 1000,
81
  "emissions_gco2eq": emissions_data.emissions * 1000,
tasks/utils/evaluation.py CHANGED
@@ -4,7 +4,6 @@ from pydantic import BaseModel, Field
4
  class BaseEvaluationRequest(BaseModel):
5
  test_size: float = Field(0.2, ge=0.0, le=1.0, description="Size of the test split (between 0 and 1)")
6
  test_seed: int = Field(42, ge=0, description="Random seed for reproducibility")
7
- model_description: Optional[str] = Field("No description provided", description="Description of the model being evaluated")
8
 
9
  class TextEvaluationRequest(BaseEvaluationRequest):
10
  dataset_name: str = Field("QuotaClimat/frugalaichallenge-text-train",
 
4
  class BaseEvaluationRequest(BaseModel):
5
  test_size: float = Field(0.2, ge=0.0, le=1.0, description="Size of the test split (between 0 and 1)")
6
  test_seed: int = Field(42, ge=0, description="Random seed for reproducibility")
 
7
 
8
  class TextEvaluationRequest(BaseEvaluationRequest):
9
  dataset_name: str = Field("QuotaClimat/frugalaichallenge-text-train",