ngrigg commited on
Commit
1232177
1 Parent(s): c654f8e
Files changed (2) hide show
  1. app.py +4 -2
  2. llama_models.py +9 -18
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import streamlit as st
2
  import pandas as pd
3
  import asyncio
4
- from llama_models import process_text
5
  from dotenv import load_dotenv
6
  import os
7
 
@@ -10,6 +10,8 @@ load_dotenv()
10
 
11
  # Ensure API key is loaded correctly
12
  api_key = os.getenv("HUGGINGFACE_API_KEY")
 
 
13
  print(f"Hugging Face API Key: {api_key}")
14
 
15
  async def process_csv(file):
@@ -28,7 +30,7 @@ async def process_csv(file):
28
  results = []
29
  for i, desc in enumerate(descriptions_subset):
30
  print(f"Processing description {i+1}/{SAMPLE_SIZE}...")
31
- result = await process_text(model_name, desc)
32
  print(f"Description {i+1} processed. Result: {result[:50]}...") # Print first 50 characters of the result
33
  results.append(result)
34
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import asyncio
4
+ from llama_models import process_text_local
5
  from dotenv import load_dotenv
6
  import os
7
 
 
10
 
11
  # Ensure API key is loaded correctly
12
  api_key = os.getenv("HUGGINGFACE_API_KEY")
13
+ if api_key is None:
14
+ raise ValueError("Hugging Face API key is not set. Please add it as a secret in your Hugging Face Space settings.")
15
  print(f"Hugging Face API Key: {api_key}")
16
 
17
  async def process_csv(file):
 
30
  results = []
31
  for i, desc in enumerate(descriptions_subset):
32
  print(f"Processing description {i+1}/{SAMPLE_SIZE}...")
33
+ result = await process_text_local(model_name, desc)
34
  print(f"Description {i+1} processed. Result: {result[:50]}...") # Print first 50 characters of the result
35
  results.append(result)
36
 
llama_models.py CHANGED
@@ -1,8 +1,11 @@
1
  import os
2
  from transformers import AutoTokenizer, AutoModelForCausalLM # Ensure correct model class
3
- import aiohttp
4
 
5
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
 
 
 
 
6
  model = None
7
  tokenizer = None
8
 
@@ -15,21 +18,9 @@ def load_model(model_name):
15
  print("Model and tokenizer loaded successfully.")
16
  return tokenizer, model
17
 
18
- async def process_text(model_name, text):
19
  tokenizer, model = load_model(model_name)
20
- prompt = f"Given the following company description, extract key products, geographies, and important keywords:\n\n{text}\n\nProducts, geographies, and keywords:"
21
-
22
- async with aiohttp.ClientSession() as session:
23
- print(f"Sending request to model API for text: {text[:50]}...")
24
- async with session.post(f"https://api-inference.huggingface.co/models/{model_name}",
25
- headers={"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"},
26
- json={"inputs": prompt}) as response:
27
- print(f"Received response with status code: {response.status}")
28
- result = await response.json()
29
- print(f"Raw API response: {result}")
30
- if isinstance(result, list) and len(result) > 0:
31
- return result[0].get('generated_text', '').strip()
32
- elif isinstance(result, dict):
33
- return result.get('generated_text', '').strip()
34
- else:
35
- return str(result)
 
1
  import os
2
  from transformers import AutoTokenizer, AutoModelForCausalLM # Ensure correct model class
 
3
 
4
  HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
5
+ if HUGGINGFACE_API_KEY is None:
6
+ raise ValueError("Hugging Face API key is not set. Please add it as a secret in your Hugging Face Space settings.")
7
+ print(f"Using Hugging Face API Key: {HUGGINGFACE_API_KEY}")
8
+
9
  model = None
10
  tokenizer = None
11
 
 
18
  print("Model and tokenizer loaded successfully.")
19
  return tokenizer, model
20
 
21
+ async def process_text_local(model_name, text):
22
  tokenizer, model = load_model(model_name)
23
+ inputs = tokenizer.encode(text, return_tensors="pt")
24
+ outputs = model.generate(inputs, max_length=512)
25
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
26
+ return result