Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,7 @@ import torch
|
|
6 |
import random
|
7 |
import time
|
8 |
import re
|
9 |
-
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
10 |
|
11 |
|
12 |
# Set an environment variable
|
@@ -16,9 +16,10 @@ zero = torch.Tensor([0]).cuda()
|
|
16 |
print(zero.device) # <-- 'cpu' 🤔
|
17 |
|
18 |
|
19 |
-
model_id = 'FINGU-AI/
|
20 |
model = AutoModelForCausalLM.from_pretrained(model_id,attn_implementation="sdpa", torch_dtype= torch.bfloat16)
|
21 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
|
22 |
model.to('cuda')
|
23 |
|
24 |
# terminators = [
|
@@ -38,21 +39,25 @@ generation_params = {
|
|
38 |
@spaces.GPU
|
39 |
def inference(query):
|
40 |
messages = [
|
41 |
-
{"role": "system", "content": """You are
|
42 |
{"role": "user", "content": f"{query}"},
|
43 |
]
|
44 |
|
45 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
46 |
|
47 |
-
outputs = model.generate(tokenized_chat, **generation_params)
|
48 |
-
|
49 |
-
assistant_response = decoded_outputs[0].split("Assistant:")[-1].strip()
|
50 |
-
return assistant_response
|
51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
def response(message, history):
|
54 |
text = inference(message)
|
55 |
for i in range(len(text)):
|
56 |
time.sleep(0.01)
|
57 |
yield text[: i + 1]
|
58 |
-
gr.ChatInterface(response).launch()
|
|
|
6 |
import random
|
7 |
import time
|
8 |
import re
|
9 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
|
10 |
|
11 |
|
12 |
# Set an environment variable
|
|
|
16 |
print(zero.device) # <-- 'cpu' 🤔
|
17 |
|
18 |
|
19 |
+
model_id = 'FINGU-AI/Qwen-Orpo-v1' #attn_implementation="flash_attention_2",
|
20 |
model = AutoModelForCausalLM.from_pretrained(model_id,attn_implementation="sdpa", torch_dtype= torch.bfloat16)
|
21 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
22 |
+
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
23 |
model.to('cuda')
|
24 |
|
25 |
# terminators = [
|
|
|
39 |
@spaces.GPU
|
40 |
def inference(query):
|
41 |
messages = [
|
42 |
+
{"role": "system", "content": """You are ai trader, invester helpfull assistant."""},
|
43 |
{"role": "user", "content": f"{query}"},
|
44 |
]
|
45 |
|
46 |
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
47 |
|
48 |
+
outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
|
49 |
+
return outputs
|
|
|
|
|
50 |
|
51 |
+
examples = ['How can options strategies such as straddles, strangles, and spreads be used to hedge against market volatility?',
|
52 |
+
'How do changes in interest rates, inflation, and GDP growth impact stock and bond markets?',
|
53 |
+
'What are the key components and strategies involved in developing an effective algorithmic trading system?',
|
54 |
+
'How can investors integrate environmental, social, and governance (ESG) factors into their investment decisions to achieve both financial returns and social impact?',
|
55 |
+
'How do geopolitical events such as trade wars, political instability, and international conflicts affect global financial markets?',
|
56 |
+
'How does blockchain technology have the potential to disrupt financial markets and investment practices?']
|
57 |
|
58 |
def response(message, history):
|
59 |
text = inference(message)
|
60 |
for i in range(len(text)):
|
61 |
time.sleep(0.01)
|
62 |
yield text[: i + 1]
|
63 |
+
gr.ChatInterface(response,examples=examples).launch()
|