Spaces:

BadAtc0ding
/

EcoMindAI2

Sleeping

App Files Files Community

EcoMindAI2 / app.py

BadAtc0ding

managed to implement working .csv handling

9570084 5 months ago

raw

history blame contribute delete

7.28 kB

	import gradio as gr
	import pandas as pd
	from services.huggingface import init_huggingface, update_dataset
	from services.json_generator import generate_json
	from ui.form_components import (
	create_header_tab,
	create_task_tab,
	create_measures_tab,
	create_system_tab,
	create_software_tab,
	create_infrastructure_tab,
	create_environment_tab,
	create_quality_tab,
	create_hash_tab
	)

	# Initialize Hugging Face
	init_huggingface()

	# Create Gradio interface
	with gr.Blocks() as demo:
	gr.Markdown("## ML-related Data Collection Form")
	gr.Markdown("Welcome to this Huggingface space that helps you fill in a form for monitoring the energy consumption of an AI model.")


	csv_upload = gr.File(label="Upload CSV", file_types=[".csv"])
	gr.Label("Please upload a CSV file with the data you want to analyze.")

	# Create form tabs
	header_components = create_header_tab()
	task_components = create_task_tab()
	measures_components = create_measures_tab()
	system_components = create_system_tab()
	software_components = create_software_tab()
	infrastructure_components = create_infrastructure_tab()
	environment_components = create_environment_tab()
	quality_components = create_quality_tab()
	hash_components = create_hash_tab()

	# Gather all form components in the order they appear in the inputs

	all_form_components = (
	header_components # 11 items (indices 0-10)
	+ task_components # 28 items (indices 11-38)
	+ measures_components # 14 items (indices 39-52)
	+ system_components # 3 items (indices 53-55)
	+ software_components # 2 items (indices 56-57)
	+ infrastructure_components # 10 items (indices 58-67)
	+ environment_components # 7 items (indices 68-74)
	+ quality_components # 1 item (index 75)
	+ hash_components # 3 items (indices 76-78)
	)

	# Parse CSV and update form values
	def parse_csv_and_update_form(csv_file, *current_values):
	updated_values = list(current_values)
	if csv_file is None:
	return updated_values

	try:
	df = pd.read_csv(csv_file.name)
	csv_data = df.iloc[0].to_dict()

	# ========== HEADER ==========
	updated_values[3] = csv_data.get('run_id', '') # reportId (index 3)
	updated_values[4] = csv_data.get('timestamp', '') # reportDatetime (4)
	updated_values[8] = csv_data.get('project_name', '') # publisher_projectName (8)

	# ========== SYSTEM ==========
	updated_values[54] = csv_data.get('os', '') # os (index 53)
	updated_values[55] = "" # distribution (54)
	updated_values[56] = "" # distributionVersion (55)

	# ========== MEASURES ==========
	updated_values[40] = csv_data.get('tracking_mode', '') # measurementMethod (39)
	updated_values[48] = "kWh" # unit (47)
	updated_values[51] = csv_data.get('energy_consumed', '') # powerConsumption (50)

	# Duration conversion (hours → seconds)
	if 'duration' in csv_data:
	try:
	hours = float(csv_data['duration'])
	updated_values[52] = str(round(hours * 3600, 2)) # measurementDuration (51)
	except:
	updated_values[52] = ""

	updated_values[53] = csv_data.get('timestamp', '') # measurementDateTime (52)

	# ========== SOFTWARE ==========
	updated_values[57] = "Python" # language (56)
	updated_values[58] = csv_data.get('python_version', '') # version_software (57)

	# ========== INFRASTRUCTURE ==========
	# infraType (58) - Dropdown
	on_cloud = str(csv_data.get('on_cloud', 'No')).lower().strip()
	updated_values[59] = "publicCloud" if on_cloud == "yes" else "onPremise"

	# Cloud fields (59-60)
	updated_values[59] = csv_data.get('cloud_provider', '') if on_cloud == "yes" else ""
	updated_values[60] = csv_data.get('cloud_region', '') if on_cloud == "yes" else ""

	# Component logic (61-67)
	gpu_count = int(csv_data.get('gpu_count', 0))
	cpu_count = int(csv_data.get('cpu_count', 0))

	if gpu_count > 0:

	updated_values[62] = str(gpu_count) # nbComponent (62)
	model = csv_data.get('gpu_model', '')
	elif cpu_count > 0:
	u
	updated_values[62] = str(cpu_count) # nbComponent (62)
	model = csv_data.get('cpu_model', '')
	else:
	model = ""

	# Memory size (63)
	ram_size = csv_data.get('ram_total_size', '')
	updated_values[63] = f"{ram_size} GB" if ram_size and float(ram_size) > 0 else ""

	# Split model into manufacturer/family/series (64-66)
	if model:
	parts = model.replace("(R)", "").replace("(TM)", "").split()
	updated_values[65] = parts[0] if parts else "" # manufacturer_infra (64)
	updated_values[66] = " ".join(parts[1:3]) if len(parts) >= 3 else "" # family (65)
	updated_values[67] = " ".join(parts[3:]) if len(parts) > 3 else "" # series (66)
	else:
	updated_values[65] = updated_values[66] = updated_values[67] = ""

	updated_values[67] = "" # share (67)

	# ========== ENVIRONMENT ==========
	updated_values[69] = csv_data.get('country_name', '') # country (68)
	updated_values[70] = csv_data.get('latitude', '') # latitude (69)
	updated_values[71] = csv_data.get('longitude', '') # longitude (70)
	updated_values[72] = csv_data.get('region', '') # location (71)

	except Exception as e:
	print(f"CSV Processing Error: {str(e)}")

	return updated_values

	# Parse CSV and update form values
	csv_upload.change(
	fn=parse_csv_and_update_form,
	inputs=[csv_upload] + all_form_components,
	outputs=all_form_components
	)

	# Submit and Download Buttons
	submit_button = gr.Button("Submit")
	output = gr.Textbox(label="Output", lines=1)
	json_output = gr.Textbox(visible=False)
	file_output = gr.File(label="Downloadable JSON")

	# Event Handlers
	submit_button.click(
	generate_json,
	inputs=[
	*header_components,
	*task_components,
	*measures_components,
	*system_components,
	*software_components,
	*infrastructure_components,
	*environment_components,
	*quality_components,
	*hash_components
	],
	outputs=[output, file_output, json_output]
	).then(
	update_dataset,
	inputs=json_output,
	outputs=output
	)

	print(all_form_components)
	print(len(all_form_components))

	if __name__ == "__main__":
	demo.launch()
	print(all_form_components)