Spaces:
Runtime error
Runtime error
File size: 2,753 Bytes
7d244cb a743a65 9834006 7d244cb a743a65 7d244cb d69d4a4 7d244cb fb9d705 7d244cb a743a65 7d244cb d69d4a4 9834006 d69d4a4 eaf592b d69d4a4 7d244cb a743a65 7d244cb eaf592b a743a65 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
import gradio as gr
import spaces
import os
import spaces
import torch
import random
import time
import re
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
import transformers
# Set an environment variable
HF_TOKEN = os.environ.get("HF_TOKEN", None)
zero = torch.Tensor([0]).cuda()
print(zero.device) # <-- 'cpu' 🤔
model_id = 'FINGU-AI/Qwen-Orpo-v1' #attn_implementation="flash_attention_2",
model = AutoModelForCausalLM.from_pretrained(model_id,attn_implementation="sdpa", torch_dtype= torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained(model_id)
# streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
model.to('cuda')
# terminators = [
# tokenizer.eos_token_id,
# tokenizer.convert_tokens_to_ids("<|eot_id|>")
# ]
generation_params = {
'max_new_tokens': 1000,
'use_cache': True,
'do_sample': True,
'temperature': 0.7,
'top_p': 0.9,
# 'top_k': 50,
}
@spaces.GPU
def inference(query):
messages = [
{"role": "system", "content": """You are ai trader, invester helpfull assistant."""},
{"role": "user", "content": f"{query}"},
]
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
outputs = model.generate(tokenized_chat, **generation_params)
decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
assistant_response = decoded_outputs[0].split("<|im_start|>assistant\n")[-1].strip()
response_ = assistant_response.replace('<|im_end|>', "")
return response_
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
# return outputs
examples = ['How can options strategies such as straddles, strangles, and spreads be used to hedge against market volatility?',
'How do changes in interest rates, inflation, and GDP growth impact stock and bond markets?',
'What are the key components and strategies involved in developing an effective algorithmic trading system?',
'How can investors integrate environmental, social, and governance (ESG) factors into their investment decisions to achieve both financial returns and social impact?',
'How do geopolitical events such as trade wars, political instability, and international conflicts affect global financial markets?',
'How does blockchain technology have the potential to disrupt financial markets and investment practices?']
def response(message, history):
text = inference(message)
return text
# for i in range(len(text)):
# time.sleep(0.01)
# yield text[: i + 1]
gr.ChatInterface(response,examples=examples).launch() |