Lunyu-LLM-Space / app.py
Shangding-Gu's picture
Update app.py
e633cf4
# import streamlit as st
# x = st.slider('Select a value')
# st.write(x, 'squared is', x * x)
import sys
import os
import transformers
import json
assert (
"LlamaTokenizer" in transformers._import_structure["models.llama"]
), "Please reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
base_model = "https://huggingface.co/Shangding-Gu/Lunyu-LLM/"
tokenizer = LlamaTokenizer.from_pretrained(base_model)
load_8bit = False
model.config.pad_token_id = tokenizer.pad_token_id = 0 # unk
model.config.bos_token_id = 1
model.config.eos_token_id = 2
class Call_model():
model.eval()
def evaluate(self, instruction):
final_output = self.inference(instruction+"\n\n### Response:")
return final_output
def inference(self,
batch_data,
input=None,
temperature=1,
top_p=0.95,
top_k=40,
num_beams=1,
max_new_tokens=4096,
**kwargs,
):
prompts = f"""A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. USER: {batch_data} ASSISTANT:"""
inputs = tokenizer(prompts, return_tensors="pt")
input_ids = inputs["input_ids"].to(device)
generation_config = GenerationConfig(
temperature=temperature,
top_p=top_p,
top_k=top_k,
num_beams=num_beams,
**kwargs,
)
generation_output = model.generate(
input_ids=input_ids,
generation_config=generation_config,
return_dict_in_generate=True,
output_scores=True,
max_new_tokens=max_new_tokens,
)
s = generation_output.sequences
output = tokenizer.batch_decode(s, skip_special_tokens=True)
output = output[0].split("ASSISTANT:")[1].strip()
return output
if __name__ == "__main__":
prompt = input("Please input:")
prompt = str(prompt)
model_evaluate = Call_model()
prompt_state = model_evaluate.evaluate(prompt)
print(prompt_state)