LlamaDutchDemo / app.py
polpoDevs's picture
adding prints to the app to see if cuda is available or not
c20bf62 verified
raw
history blame
2.66 kB
import re
import time
import streamlit as st
from transformers import pipeline, Conversation, AutoTokenizer
from langdetect import detect
import torch
print(f"Is CUDA available: {torch.cuda.is_available()}")
# True
print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
# Tesla T4
# choose your model here by setting model_chosen_id equal to 1 or 2
model_chosen_id = 2
model_name_options = {
1: "meta-llama/Llama-2-13b-chat-hf",
2: "BramVanroy/Llama-2-13b-chat-dutch"
}
model_chosen = model_name_options[model_chosen_id]
my_config = {'model_name': model_chosen, 'do_sample': True, 'temperature': 0.1, 'repetition_penalty': 1.1, 'max_new_tokens': 500, }
print(f"Selected model: {my_config['model_name']}")
print(f"Parameters are: {my_config}")
def count_words(text):
# Use a simple regular expression to count words
words = re.findall(r'\b\w+\b', text)
return len(words)
def generate_with_llama_chat(my_config):
# get the parameters from the config dict
do_sample = my_config.get('do_sample', True)
temperature = my_config.get('temperature', 0.1)
repetition_penalty = my_config.get('repetition_penalty', 1.1)
max_new_tokens = my_config.get('max_new_tokens', 500)
start_time = time.time()
model = my_config['model_name']
tokenizer = AutoTokenizer.from_pretrained(model)
chatbot = pipeline("conversational",model=model,
tokenizer=tokenizer,
do_sample=do_sample,
temperature=temperature,
repetition_penalty=repetition_penalty,
#max_length=2000,
max_new_tokens=max_new_tokens,
model_kwargs={"device_map": "auto","load_in_8bit": True}) #, "src_lang": "en", "tgt_lang": "nl"}) does not work!
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Loading the model: {elapsed_time} seconds")
return chatbot
def get_answer(chatbot, input_text):
start_time = time.time()
print(f"Processing the input\n {input_text}\n")
print('Processing the answer....')
conversation = Conversation(input_text)
print(f"Conversation(input_text): {conversation}")
output = (chatbot(conversation))[1]['content']
#Add the last print statement to the output variable
output += f"\nAnswered in {elapsed_time:.1f} seconds, Nr generated words: {count_words(output)}"
return output
chatbot = generate_with_llama_chat(my_config)
text = st.text_area("Enter text to summarize here.")
if text:
out = get_answer(chatbot, text)
st.json(out)