Spaces:
Runtime error
Runtime error
File size: 2,722 Bytes
5aa525a f50d98f 5aa525a bd9ae0a 1d7f870 5aa525a bd9ae0a 5aa525a 139a30c 5aa525a c81b713 0f3d4bc af29fad befe0ba 91130dc af29fad 5aa525a 18a0d5a 5aa525a 74c467a 1d7f870 5aa525a 5b7582d 5aa525a af29fad 5aa525a 7d18afe af96bbe af29fad 5aa525a 3952e87 27856a5 5aa525a 5b7582d 4e1e4a1 5b7582d 898c8a2 27856a5 dfa8b8c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
"""
@author: idoia lerchundi
"""
import os
import time
import streamlit as st
from huggingface_hub import InferenceClient
import random
# Load the API token from an environment variable
api_key = os.getenv("HF_TOKEN")
# Instantiate the InferenceClient
client = InferenceClient(api_key=api_key)
# Streamlit app title
st.title("LM using HF Inference API (serverless) feature.")
# Ensure the timing variables are initialized in session state
if "elapsed_time" not in st.session_state:
st.session_state["elapsed_time"] = 0.0
# Ensure the full_text key is initialized in session state
if "full_text" not in st.session_state:
st.session_state["full_text"] = ""
# Model selection dropdown
model_options = [ "TinyLlama/TinyLlama-1.1B-Chat-v1.0","Qwen/Qwen2.5-1.5B-Instruct", "Qwen/Qwen2.5-72B-Instruct", "meta-llama/Llama-3.2-3B-Instruct","meta-llama/Llama-3.1-8B-Instruct","meta-llama/Llama-3.2-1B-Instruct","codellama/CodeLlama-34b-Instruct-hf"]
selected_model = st.selectbox("Choose a model:", model_options)
# Create a text input area for user prompts
with st.form("my_form"):
text = st.text_area("JOKER (TinyLlama is not great at joke telling.)", "Tell me a clever and funny joke in exactly 4 sentences. It should make me laugh really hard. Don't repeat the topic in your joke. Be creative and concise.")
submitted = st.form_submit_button("Submit")
# Initialize the full_text variable
full_text = ""
minutes = 0
# Generate a random temperature between 0.5 and 1.0
temperature = random.uniform(0.5, 1.0)
if submitted:
messages = [
{"role": "user", "content": text}
]
# Start timing
start_time = time.time()
# Create a new stream for each submission
stream = client.chat.completions.create(
model=selected_model,
messages=messages,
# Generate a random temperature between 0.5 and 1.0
temperature = random.uniform(0.5, 1.0),
max_tokens=300,
top_p=random.uniform(0.7, 1.0),
stream=True
)
# Concatenate chunks to form the full response
for chunk in stream:
full_text += chunk.choices[0].delta.content
# End timing
end_time = time.time()
elapsed_time = end_time - start_time
# Calculate minutes, seconds, and milliseconds
minutes = 0
minutes, seconds = divmod(elapsed_time, 60)
milliseconds = (seconds - int(seconds)) * 1000
# Update session state with the full response
st.session_state["full_text"] = full_text
# Display the full response
if st.session_state["full_text"]:
st.info(st.session_state["full_text"])
st.info(f"Elapsed Time: {int(minutes)} minutes, {seconds} seconds, and {milliseconds:.2f} milliseconds") |