from transformers import pipeline from transformers import AutoModelForCausalLM, AutoTokenizer import gradio as gr from nltk.tokenize import sent_tokenize import torch model = "janny127/autotrain-7qmts-cs1er" tokenizer = AutoTokenizer.from_pretrained(model) pipeline = pipeline( "text-generation", model=model, torch_dtype=torch.float16, device_map="auto", ) def predict(prompt): # Prompt prompt = "What are the values in open source projects?" formatted_prompt = ( f"### Human: {prompt}### Assistant:" ) # Generate the Texts sequences = pipeline( formatted_prompt, do_sample=True, top_k=50, top_p = 0.7, num_return_sequences=1, repetition_penalty=1.1, max_new_tokens=500, ) final_result = '' # Print the result for seq in sequences: print(f"Result: {seq['generated_text']}") final_result += str(seq['generated_text']) return final_result gr.ChatInterface(predict, title="Tinyllama_chatBot", description="Ask Tiny llama any questions", examples=['How to cook a fish?', 'Who is the president of US now?'] ).launch() # Launching the web interface. # interface = gr.ChatInterface( # fn=predict, # stop_btn=None # ) # with gr.Blocks() as demo: # interface.render() demo.launch() # def generate_answer(query, sample_num=3): # formatted_prompt = ( # f"<|im_start|>user\n{query}<|im_end|>\n<|im_start|>assistant\n" # ) # sequences = pipeline( # formatted_prompt, # do_sample=True, # top_k=50, # top_p = 0.9, # num_return_sequences=sample_num, # repetition_penalty=1.1, # max_new_tokens=150, # eos_token_id=CHAT_EOS_TOKEN_ID, # ) # answers = list() # for seq in sequences: # answer = seq['generated_text'].replace(formatted_prompt, "") # answers.append(answer) # answer = sent_tokenize(answers[0]) # final_answer = '' # for an in answer: # final_answer += an.strip() # return final_answer