vdwow commited on
Commit
7f50b6e
·
1 Parent(s): 016c217

feat: update to ecologits 0.5.1 + remove own usage to fix it

Browse files
Files changed (5) hide show
  1. app.py +103 -113
  2. requirements-dev.txt +1 -1
  3. requirements.txt +1 -1
  4. src/scrapper.py +33 -0
  5. src/utils.py +20 -7
app.py CHANGED
@@ -1,13 +1,7 @@
1
  import gradio as gr
2
 
3
- import requests
4
- from bs4 import BeautifulSoup
5
-
6
- import tiktoken
7
-
8
- from ecologits.tracers.utils import compute_llm_impacts, _avg
9
  from ecologits.impacts.llm import compute_llm_impacts as compute_llm_impacts_expert
10
- from ecologits.impacts.llm import IF_ELECTRICITY_MIX_GWP, IF_ELECTRICITY_MIX_ADPE, IF_ELECTRICITY_MIX_PE
11
  from ecologits.model_repository import models
12
 
13
  from src.assets import custom_css
@@ -36,15 +30,17 @@ from src.utils import (
36
  format_energy_eq_physical_activity,
37
  PhysicalActivity,
38
  format_energy_eq_electric_vehicle,
39
- format_gwp_eq_streaming, format_energy_eq_electricity_production, EnergyProduction,
40
- format_gwp_eq_airplane_paris_nyc, format_energy_eq_electricity_consumption_ireland,
 
 
 
41
  df_elec_mix_for_plot
42
  )
 
43
 
44
  CUSTOM = "Custom"
45
 
46
- tokenizer = tiktoken.get_encoding('cl100k_base')
47
-
48
  def model_list(provider: str) -> gr.Dropdown:
49
  if provider == "openai":
50
  return gr.Dropdown(
@@ -86,28 +82,37 @@ def model_list(provider: str) -> gr.Dropdown:
86
  def custom():
87
  return CUSTOM
88
 
89
- def tiktoken_len(text):
90
- tokens = tokenizer.encode(
91
- text,
92
- disallowed_special=()
93
- )
94
- return len(tokens)
95
-
96
  def model_active_params_fn(model_name: str, n_param: float):
97
  if model_name == CUSTOM:
98
  return n_param
99
  provider, model_name = model_name.split('/', 1)
100
  model = models.find_model(provider=provider, model_name=model_name)
101
- return model.active_parameters or _avg(model.active_parameters_range)
102
-
 
 
 
 
 
 
 
 
103
 
104
  def model_total_params_fn(model_name: str, n_param: float):
105
  if model_name == CUSTOM:
106
  return n_param
107
  provider, model_name = model_name.split('/', 1)
108
  model = models.find_model(provider=provider, model_name=model_name)
109
- return model.total_parameters or _avg(model.total_parameters_range)
110
-
 
 
 
 
 
 
 
 
111
 
112
  def mix_fn(country_code: str, mix_adpe: float, mix_pe: float, mix_gwp: float):
113
  if country_code == CUSTOM:
@@ -148,7 +153,7 @@ with gr.Blocks(css=custom_css) as demo:
148
  if provider.startswith("huggingface_hub"):
149
  provider = provider.split("/")[0]
150
  if models.find_model(provider, model) is not None:
151
- impacts = compute_llm_impacts(
152
  provider=provider,
153
  model_name=model,
154
  output_token_count=prompt,
@@ -316,17 +321,17 @@ with gr.Blocks(css=custom_css) as demo:
316
  )
317
  input_mix_gwp = gr.Number(
318
  label="Electricity mix - GHG emissions [kgCO2eq / kWh]",
319
- value=IF_ELECTRICITY_MIX_GWP,
320
  interactive=True
321
  )
322
  input_mix_adpe = gr.Number(
323
  label="Electricity mix - Abiotic resources [kgSbeq / kWh]",
324
- value=IF_ELECTRICITY_MIX_ADPE,
325
  interactive=True
326
  )
327
  input_mix_pe = gr.Number(
328
  label="Electricity mix - Primary energy [MJ / kWh]",
329
- value=IF_ELECTRICITY_MIX_PE,
330
  interactive=True
331
  )
332
 
@@ -423,104 +428,89 @@ with gr.Blocks(css=custom_css) as demo:
423
  x_title=None,
424
  y_title='electricity mix in gCO2eq / kWh')
425
 
426
- with gr.Tab("🔍 Evaluate your own usage"):
427
 
428
- with gr.Row():
429
- gr.Markdown("""
430
- # 🔍 Evaluate your own usage
431
- ⚠️ For now, only ChatGPT conversation import is available.
432
- You can always try out other models - however results might be inaccurate due to fixed parameters, such as tokenization method.
433
- """)
434
 
435
- def process_input(text):
436
-
437
- r = requests.get(text, verify=False)
 
 
 
 
 
 
438
 
439
- soup = BeautifulSoup(r.text, "html.parser")
440
- list_text = str(soup).split('parts":["')
441
- s = ''
442
- for item in list_text[1:int(len(list_text)/2)]:
443
- if list_text.index(item)%2 == 1:
444
- s = s + item.split('"]')[0]
445
 
446
- amout_token = tiktoken_len(s)
447
-
448
- return amout_token
449
-
450
- def compute_own_impacts(amount_token, model):
451
- provider = model.split('/')[0].lower()
452
- model = model.split('/')[1]
453
- impacts = compute_llm_impacts(
454
- provider=provider,
455
- model_name=model,
456
- output_token_count=amount_token,
457
- request_latency=100000
458
- )
459
 
460
- impacts = format_impacts(impacts)
461
-
462
- energy = f"""
463
- <h2 align="center">⚡️ Energy</h2>
464
- $$ \Large {impacts.energy.magnitude:.3g} \ \large {impacts.energy.units} $$
465
- <p align="center"><i>Evaluates the electricity consumption<i></p><br>
466
- """
467
 
468
- gwp = f"""
469
- <h2 align="center">🌍️ GHG Emissions</h2>
470
- $$ \Large {impacts.gwp.magnitude:.3g} \ \large {impacts.gwp.units} $$
471
- <p align="center"><i>Evaluates the effect on global warming<i></p><br>
472
- """
473
 
474
- adp = f"""
475
- <h2 align="center">🪨 Abiotic Resources</h2>
476
- $$ \Large {impacts.adpe.magnitude:.3g} \ \large {impacts.adpe.units} $$
477
- <p align="center"><i>Evaluates the use of metals and minerals<i></p><br>
478
- """
479
-
480
- pe = f"""
481
- <h2 align="center">⛽️ Primary Energy</h2>
482
- $$ \Large {impacts.pe.magnitude:.3g} \ \large {impacts.pe.units} $$
483
- <p align="center"><i>Evaluates the use of energy resources<i></p><br>
484
- """
485
 
486
- return energy, gwp, adp, pe
487
 
488
- def combined_function(text, model):
489
- n_token = process_input(text)
490
- energy, gwp, adp, pe = compute_own_impacts(n_token, model)
491
- return n_token, energy, gwp, adp, pe
492
 
493
- with gr.Blocks():
494
-
495
- text_input = gr.Textbox(label="Paste the URL here (must be on https://chatgpt.com/share/xxxx format)")
496
- model = gr.Dropdown(
497
- MODELS,
498
- label="Model name",
499
- value="openai/gpt-4o",
500
- filterable=True,
501
- interactive=True
502
- )
503
-
504
- process_button = gr.Button("Estimate this usage footprint")
505
 
506
- with gr.Accordion("ℹ️ Infos", open=False):
507
- n_token = gr.Textbox(label="Total amount of tokens :")
508
-
509
- with gr.Row():
510
- with gr.Column(scale=1, min_width=220):
511
- energy = gr.Markdown()
512
- with gr.Column(scale=1, min_width=220):
513
- gwp = gr.Markdown()
514
- with gr.Column(scale=1, min_width=220):
515
- adp = gr.Markdown()
516
- with gr.Column(scale=1, min_width=220):
517
- pe = gr.Markdown()
518
-
519
- process_button.click(
520
- fn=combined_function,
521
- inputs=[text_input, model],
522
- outputs=[n_token, energy, gwp, adp, pe]
523
- )
524
 
525
  with gr.Tab("📖 Methodology"):
526
  gr.Markdown(METHODOLOGY_TEXT,
 
1
  import gradio as gr
2
 
3
+ from ecologits.tracers.utils import llm_impacts, _avg
 
 
 
 
 
4
  from ecologits.impacts.llm import compute_llm_impacts as compute_llm_impacts_expert
 
5
  from ecologits.model_repository import models
6
 
7
  from src.assets import custom_css
 
30
  format_energy_eq_physical_activity,
31
  PhysicalActivity,
32
  format_energy_eq_electric_vehicle,
33
+ format_gwp_eq_streaming,
34
+ format_energy_eq_electricity_production,
35
+ EnergyProduction,
36
+ format_gwp_eq_airplane_paris_nyc,
37
+ format_energy_eq_electricity_consumption_ireland,
38
  df_elec_mix_for_plot
39
  )
40
+ from src.scrapper import process_input
41
 
42
  CUSTOM = "Custom"
43
 
 
 
44
  def model_list(provider: str) -> gr.Dropdown:
45
  if provider == "openai":
46
  return gr.Dropdown(
 
82
  def custom():
83
  return CUSTOM
84
 
 
 
 
 
 
 
 
85
  def model_active_params_fn(model_name: str, n_param: float):
86
  if model_name == CUSTOM:
87
  return n_param
88
  provider, model_name = model_name.split('/', 1)
89
  model = models.find_model(provider=provider, model_name=model_name)
90
+ try: #moe with range
91
+ return model.architecture.parameters.active.max
92
+ except:
93
+ try: #moe without range
94
+ return model.architecture.parameters.active
95
+ except:
96
+ try: #dense with range
97
+ return model.architecture.parameters.max
98
+ except: #dense without range
99
+ return model.architecture.parameters
100
 
101
  def model_total_params_fn(model_name: str, n_param: float):
102
  if model_name == CUSTOM:
103
  return n_param
104
  provider, model_name = model_name.split('/', 1)
105
  model = models.find_model(provider=provider, model_name=model_name)
106
+ try: #moe
107
+ return model.architecture.parameters.total.max
108
+ except:
109
+ try: #dense with range
110
+ return model.architecture.parameters.max
111
+ except: #dense without range
112
+ try:
113
+ return model.architecture.parameters.total
114
+ except:
115
+ return model.architecture.parameters
116
 
117
  def mix_fn(country_code: str, mix_adpe: float, mix_pe: float, mix_gwp: float):
118
  if country_code == CUSTOM:
 
153
  if provider.startswith("huggingface_hub"):
154
  provider = provider.split("/")[0]
155
  if models.find_model(provider, model) is not None:
156
+ impacts = llm_impacts(
157
  provider=provider,
158
  model_name=model,
159
  output_token_count=prompt,
 
321
  )
322
  input_mix_gwp = gr.Number(
323
  label="Electricity mix - GHG emissions [kgCO2eq / kWh]",
324
+ value=find_electricity_mix('WOR')[2],
325
  interactive=True
326
  )
327
  input_mix_adpe = gr.Number(
328
  label="Electricity mix - Abiotic resources [kgSbeq / kWh]",
329
+ value=find_electricity_mix('WOR')[0],
330
  interactive=True
331
  )
332
  input_mix_pe = gr.Number(
333
  label="Electricity mix - Primary energy [MJ / kWh]",
334
+ value=find_electricity_mix('WOR')[1],
335
  interactive=True
336
  )
337
 
 
428
  x_title=None,
429
  y_title='electricity mix in gCO2eq / kWh')
430
 
431
+ # with gr.Tab("🔍 Evaluate your own usage"):
432
 
433
+ # with gr.Row():
434
+ # gr.Markdown("""
435
+ # # 🔍 Evaluate your own usage
436
+ # ⚠️ For now, only ChatGPT conversation import is available.
437
+ # You can always try out other models - however results might be inaccurate due to fixed parameters, such as tokenization method.
438
+ # """)
439
 
440
+ # def compute_own_impacts(amount_token, model):
441
+ # provider = model.split('/')[0].lower()
442
+ # model = model.split('/')[1]
443
+ # impacts = llm_impacts(
444
+ # provider=provider,
445
+ # model_name=model,
446
+ # output_token_count=amount_token,
447
+ # request_latency=100000
448
+ # )
449
 
450
+ # impacts = format_impacts(impacts)
 
 
 
 
 
451
 
452
+ # energy = f"""
453
+ # <h2 align="center">⚡️ Energy</h2>
454
+ # $$ \Large {impacts.energy.magnitude:.3g} \ \large {impacts.energy.units} $$
455
+ # <p align="center"><i>Evaluates the electricity consumption<i></p><br>
456
+ # """
 
 
 
 
 
 
 
 
457
 
458
+ # gwp = f"""
459
+ # <h2 align="center">🌍️ GHG Emissions</h2>
460
+ # $$ \Large {impacts.gwp.magnitude:.3g} \ \large {impacts.gwp.units} $$
461
+ # <p align="center"><i>Evaluates the effect on global warming<i></p><br>
462
+ # """
 
 
463
 
464
+ # adp = f"""
465
+ # <h2 align="center">🪨 Abiotic Resources</h2>
466
+ # $$ \Large {impacts.adpe.magnitude:.3g} \ \large {impacts.adpe.units} $$
467
+ # <p align="center"><i>Evaluates the use of metals and minerals<i></p><br>
468
+ # """
469
 
470
+ # pe = f"""
471
+ # <h2 align="center">⛽️ Primary Energy</h2>
472
+ # $$ \Large {impacts.pe.magnitude:.3g} \ \large {impacts.pe.units} $$
473
+ # <p align="center"><i>Evaluates the use of energy resources<i></p><br>
474
+ # """
 
 
 
 
 
 
475
 
476
+ # return energy, gwp, adp, pe
477
 
478
+ # def combined_function(text, model):
479
+ # n_token = process_input(text)
480
+ # energy, gwp, adp, pe = compute_own_impacts(n_token, model)
481
+ # return n_token, energy, gwp, adp, pe
482
 
483
+ # with gr.Blocks():
484
+
485
+ # text_input = gr.Textbox(label="Paste the URL here (must be on https://chatgpt.com/share/xxxx format)")
486
+ # model = gr.Dropdown(
487
+ # MODELS,
488
+ # label="Model name",
489
+ # value="openai/gpt-4o",
490
+ # filterable=True,
491
+ # interactive=True
492
+ # )
493
+
494
+ # process_button = gr.Button("Estimate this usage footprint")
495
 
496
+ # with gr.Accordion("ℹ️ Infos", open=False):
497
+ # n_token = gr.Textbox(label="Total amount of tokens :")
498
+
499
+ # with gr.Row():
500
+ # with gr.Column(scale=1, min_width=220):
501
+ # energy = gr.Markdown()
502
+ # with gr.Column(scale=1, min_width=220):
503
+ # gwp = gr.Markdown()
504
+ # with gr.Column(scale=1, min_width=220):
505
+ # adp = gr.Markdown()
506
+ # with gr.Column(scale=1, min_width=220):
507
+ # pe = gr.Markdown()
508
+
509
+ # process_button.click(
510
+ # fn=combined_function,
511
+ # inputs=[text_input, model],
512
+ # outputs=[n_token, energy, gwp, adp, pe]
513
+ # )
514
 
515
  with gr.Tab("📖 Methodology"):
516
  gr.Markdown(METHODOLOGY_TEXT,
requirements-dev.txt CHANGED
@@ -1,5 +1,5 @@
1
  gradio
2
- ecologits==0.1.7
3
  pint
4
  beautifulsoup4
5
  requests
 
1
  gradio
2
+ ecologits==0.5.1
3
  pint
4
  beautifulsoup4
5
  requests
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- ecologits==0.1.7
2
  pint
3
  beautifulsoup4
4
  requests
 
1
+ ecologits==0.5.1
2
  pint
3
  beautifulsoup4
4
  requests
src/scrapper.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ from bs4 import BeautifulSoup
3
+ import tiktoken
4
+
5
+ tokenizer = tiktoken.get_encoding('cl100k_base')
6
+
7
+ def process_input(text):
8
+
9
+ r = requests.get(text, verify=False)
10
+
11
+ soup = BeautifulSoup(r.text, "html.parser")
12
+ print(soup)
13
+ list_text = str(soup).split('parts":["')
14
+ #print(list_text)
15
+ s = ''
16
+ for item in list_text[1:int(len(list_text)/2)]:
17
+ if list_text.index(item)%2 == 1:
18
+ s = s + item.split('"]')[0]
19
+
20
+ amout_token = tiktoken_len(s)
21
+
22
+ return amout_token
23
+
24
+ def tiktoken_len(text):
25
+ tokens = tokenizer.encode(
26
+ text,
27
+ disallowed_special=()
28
+ )
29
+ return len(tokens)
30
+
31
+ answer = process_input('https://chatgpt.com/share/6737b9b5-56fc-8002-a212-35339f5b1d5a')
32
+
33
+ print(answer)
src/utils.py CHANGED
@@ -3,7 +3,8 @@ from enum import Enum
3
 
4
  import pandas as pd
5
 
6
- from ecologits.impacts.models import Impacts, Energy, GWP, ADPe, PE
 
7
  from pint import UnitRegistry, Quantity
8
 
9
 
@@ -116,12 +117,24 @@ def format_pe(pe: PE) -> Quantity:
116
  return val
117
 
118
  def format_impacts(impacts: Impacts) -> QImpacts:
119
- return QImpacts(
120
- energy=format_energy(impacts.energy),
121
- gwp=format_gwp(impacts.gwp),
122
- adpe=format_adpe(impacts.adpe),
123
- pe=format_pe(impacts.pe)
124
- )
 
 
 
 
 
 
 
 
 
 
 
 
125
 
126
  def format_impacts_expert(impacts: Impacts) -> QImpacts:
127
  return QImpacts(
 
3
 
4
  import pandas as pd
5
 
6
+ from ecologits.impacts.modeling import Impacts, Energy, GWP, ADPe, PE
7
+ from ecologits.tracers.utils import llm_impacts, _avg
8
  from pint import UnitRegistry, Quantity
9
 
10
 
 
117
  return val
118
 
119
  def format_impacts(impacts: Impacts) -> QImpacts:
120
+ try:
121
+ impacts.energy.value = (impacts.energy.value.max + impacts.energy.value.min)/2
122
+ impacts.gwp.value = (impacts.gwp.value.max + impacts.gwp.value.min)/2
123
+ impacts.adpe.value = (impacts.adpe.value.max + impacts.adpe.value.min)/2
124
+ impacts.pe.value = (impacts.pe.value.max + impacts.pe.value.min)/2
125
+ return QImpacts(
126
+ energy=format_energy(impacts.energy),
127
+ gwp=format_gwp(impacts.gwp),
128
+ adpe=format_adpe(impacts.adpe),
129
+ pe=format_pe(impacts.pe)
130
+ )
131
+ except: #when no range
132
+ return QImpacts(
133
+ energy=format_energy(impacts.energy),
134
+ gwp=format_gwp(impacts.gwp),
135
+ adpe=format_adpe(impacts.adpe),
136
+ pe=format_pe(impacts.pe)
137
+ )
138
 
139
  def format_impacts_expert(impacts: Impacts) -> QImpacts:
140
  return QImpacts(