import gradio as gr from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import torch from Ashare_data import * from Inference_datapipe import * import re # load model model = "meta-llama/Llama-2-7b-chat-hf" peft_model = "FinGPT/fingpt-forecaster_sz50_llama2-7B_lora" tokenizer = AutoTokenizer.from_pretrained(model, trust_remote_code=True) tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = "right" model = AutoModelForCausalLM.from_pretrained(model, trust_remote_code=True, device_map = 'auto', offload_folder="offload/") model = PeftModel.from_pretrained(model, peft_model, offload_folder="offload/") model = model.eval() def ask(symbol, weeks_before): # load inference data info, pt = get_all_prompts_online(symbol=symbol, weeks_before=weeks_before) # print(info) inputs = tokenizer(pt, return_tensors='pt') inputs = {key: value.to(model.device) for key, value in inputs.items()} print("Inputs loaded onto devices.") res = model.generate( **inputs, use_cache=True ) output = tokenizer.decode(res[0], skip_special_tokens=True) output_cur = re.sub(r'.*\[/INST\]\s*', '', output, flags=re.DOTALL) return info, output_cur server = gr.Interface( ask, inputs=[ gr.Textbox( label="Symbol", value="600519", info="Companys from SZ50 are recommended" ), gr.Slider( minimum=1, maximum=3, value=2, step=1, label="weeks_before", info="Due to the token length constraint, you are recommended to input with 2" ), ], outputs=[ gr.Textbox( label="Information" ), gr.Textbox( label="Response" ) ], title="FinGPT-Forecaster-Chinese", description="""This version allows the prediction based on the most current date. We will upgrade it to allow customized date soon.""" ) server.launch()