EcoMindAI2 / app.py
BadAtc0ding's picture
managed to implement working .csv handling
9570084
import gradio as gr
import pandas as pd
from services.huggingface import init_huggingface, update_dataset
from services.json_generator import generate_json
from ui.form_components import (
create_header_tab,
create_task_tab,
create_measures_tab,
create_system_tab,
create_software_tab,
create_infrastructure_tab,
create_environment_tab,
create_quality_tab,
create_hash_tab
)
# Initialize Hugging Face
init_huggingface()
# Create Gradio interface
with gr.Blocks() as demo:
gr.Markdown("## ML-related Data Collection Form")
gr.Markdown("Welcome to this Huggingface space that helps you fill in a form for monitoring the energy consumption of an AI model.")
csv_upload = gr.File(label="Upload CSV", file_types=[".csv"])
gr.Label("Please upload a CSV file with the data you want to analyze.")
# Create form tabs
header_components = create_header_tab()
task_components = create_task_tab()
measures_components = create_measures_tab()
system_components = create_system_tab()
software_components = create_software_tab()
infrastructure_components = create_infrastructure_tab()
environment_components = create_environment_tab()
quality_components = create_quality_tab()
hash_components = create_hash_tab()
# Gather all form components in the order they appear in the inputs
all_form_components = (
header_components # 11 items (indices 0-10)
+ task_components # 28 items (indices 11-38)
+ measures_components # 14 items (indices 39-52)
+ system_components # 3 items (indices 53-55)
+ software_components # 2 items (indices 56-57)
+ infrastructure_components # 10 items (indices 58-67)
+ environment_components # 7 items (indices 68-74)
+ quality_components # 1 item (index 75)
+ hash_components # 3 items (indices 76-78)
)
# Parse CSV and update form values
def parse_csv_and_update_form(csv_file, *current_values):
updated_values = list(current_values)
if csv_file is None:
return updated_values
try:
df = pd.read_csv(csv_file.name)
csv_data = df.iloc[0].to_dict()
# ========== HEADER ==========
updated_values[3] = csv_data.get('run_id', '') # reportId (index 3)
updated_values[4] = csv_data.get('timestamp', '') # reportDatetime (4)
updated_values[8] = csv_data.get('project_name', '') # publisher_projectName (8)
# ========== SYSTEM ==========
updated_values[54] = csv_data.get('os', '') # os (index 53)
updated_values[55] = "" # distribution (54)
updated_values[56] = "" # distributionVersion (55)
# ========== MEASURES ==========
updated_values[40] = csv_data.get('tracking_mode', '') # measurementMethod (39)
updated_values[48] = "kWh" # unit (47)
updated_values[51] = csv_data.get('energy_consumed', '') # powerConsumption (50)
# Duration conversion (hours → seconds)
if 'duration' in csv_data:
try:
hours = float(csv_data['duration'])
updated_values[52] = str(round(hours * 3600, 2)) # measurementDuration (51)
except:
updated_values[52] = ""
updated_values[53] = csv_data.get('timestamp', '') # measurementDateTime (52)
# ========== SOFTWARE ==========
updated_values[57] = "Python" # language (56)
updated_values[58] = csv_data.get('python_version', '') # version_software (57)
# ========== INFRASTRUCTURE ==========
# infraType (58) - Dropdown
on_cloud = str(csv_data.get('on_cloud', 'No')).lower().strip()
updated_values[59] = "publicCloud" if on_cloud == "yes" else "onPremise"
# Cloud fields (59-60)
updated_values[59] = csv_data.get('cloud_provider', '') if on_cloud == "yes" else ""
updated_values[60] = csv_data.get('cloud_region', '') if on_cloud == "yes" else ""
# Component logic (61-67)
gpu_count = int(csv_data.get('gpu_count', 0))
cpu_count = int(csv_data.get('cpu_count', 0))
if gpu_count > 0:
updated_values[62] = str(gpu_count) # nbComponent (62)
model = csv_data.get('gpu_model', '')
elif cpu_count > 0:
u
updated_values[62] = str(cpu_count) # nbComponent (62)
model = csv_data.get('cpu_model', '')
else:
model = ""
# Memory size (63)
ram_size = csv_data.get('ram_total_size', '')
updated_values[63] = f"{ram_size} GB" if ram_size and float(ram_size) > 0 else ""
# Split model into manufacturer/family/series (64-66)
if model:
parts = model.replace("(R)", "").replace("(TM)", "").split()
updated_values[65] = parts[0] if parts else "" # manufacturer_infra (64)
updated_values[66] = " ".join(parts[1:3]) if len(parts) >= 3 else "" # family (65)
updated_values[67] = " ".join(parts[3:]) if len(parts) > 3 else "" # series (66)
else:
updated_values[65] = updated_values[66] = updated_values[67] = ""
updated_values[67] = "" # share (67)
# ========== ENVIRONMENT ==========
updated_values[69] = csv_data.get('country_name', '') # country (68)
updated_values[70] = csv_data.get('latitude', '') # latitude (69)
updated_values[71] = csv_data.get('longitude', '') # longitude (70)
updated_values[72] = csv_data.get('region', '') # location (71)
except Exception as e:
print(f"CSV Processing Error: {str(e)}")
return updated_values
# Parse CSV and update form values
csv_upload.change(
fn=parse_csv_and_update_form,
inputs=[csv_upload] + all_form_components,
outputs=all_form_components
)
# Submit and Download Buttons
submit_button = gr.Button("Submit")
output = gr.Textbox(label="Output", lines=1)
json_output = gr.Textbox(visible=False)
file_output = gr.File(label="Downloadable JSON")
# Event Handlers
submit_button.click(
generate_json,
inputs=[
*header_components,
*task_components,
*measures_components,
*system_components,
*software_components,
*infrastructure_components,
*environment_components,
*quality_components,
*hash_components
],
outputs=[output, file_output, json_output]
).then(
update_dataset,
inputs=json_output,
outputs=output
)
print(all_form_components)
print(len(all_form_components))
if __name__ == "__main__":
demo.launch()
print(all_form_components)