Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,95 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
import os
|
4 |
+
import spaces
|
5 |
+
import torch
|
6 |
+
import random
|
7 |
+
import time
|
8 |
+
import re
|
9 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
|
10 |
+
import transformers
|
11 |
+
|
12 |
+
|
13 |
+
|
14 |
+
# Set an environment variable
|
15 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
16 |
+
|
17 |
+
zero = torch.Tensor([0]).cuda()
|
18 |
+
print(zero.device) # <-- 'cpu' ๐ค
|
19 |
+
|
20 |
+
model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
|
21 |
+
peft_model_id = "Imran1/Llama3.1_8b_Dora"
|
22 |
+
#attn_implementation="flash_attention_2",
|
23 |
+
model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", torch_dtype= torch.bfloat16)
|
24 |
+
model.load_adapter(peft_model_id)
|
25 |
+
tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
|
26 |
+
# streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
|
27 |
+
model.to('cuda')
|
28 |
+
|
29 |
+
# Set pad_token_id if it's not already set
|
30 |
+
if tokenizer.pad_token_id is None:
|
31 |
+
tokenizer.pad_token_id = tokenizer.eos_token_id
|
32 |
+
|
33 |
+
# Define terminators
|
34 |
+
terminators = [
|
35 |
+
tokenizer.eos_token_id,
|
36 |
+
tokenizer.convert_tokens_to_ids("<|eot_id|>")
|
37 |
+
]
|
38 |
+
|
39 |
+
generation_params = {
|
40 |
+
'max_new_tokens': 2000,
|
41 |
+
'use_cache': True,
|
42 |
+
'do_sample': True,
|
43 |
+
'temperature': 0.7,
|
44 |
+
'top_p': 0.9,
|
45 |
+
# 'top_k': 50,
|
46 |
+
'pad_token_id': tokenizer.pad_token_id,
|
47 |
+
'eos_token_id': terminators,
|
48 |
+
}
|
49 |
+
|
50 |
+
|
51 |
+
@spaces.GPU
|
52 |
+
def inference(query):
|
53 |
+
messages = [
|
54 |
+
{"role": "system", "content": """You are a highly skilled multilingual AI assistant specializing in banking and finance translations, with a focus on BNK Bank's products and services. Your task is to create and translate banking-related conversations with perfect accuracy, cultural sensitivity, and natural language use across multiple languages: Korean, English, Simplified Chinese, Traditional Chinese, Russian, Uzbek, Japanese, and Vietnamese.
|
55 |
+
1. Language Expertise:
|
56 |
+
- Demonstrate native-level fluency in all eight languages.
|
57 |
+
- language = ["English", "Korean", "Simplified Chinese", "Traditional Chinese", "Russian", "Uzbek", "Japanese", "Vietnamese"]
|
58 |
+
- Apply nuances, idioms, and cultural contexts specific to each language with precision.
|
59 |
+
- Ensure that each translation reads as if it were originally written in that language.
|
60 |
+
2. Banking and Finance Knowledge:
|
61 |
+
- Exhibit expert-level understanding of banking terminology, products, and services, especially those specific to BNK Bank.
|
62 |
+
- Maintain perfect consistency in translating specialized banking terms across all languages.
|
63 |
+
- Pay meticulous attention to Korean banking terms, ensuring they are accurately represented in other languages.
|
64 |
+
3. BNK Bank-Specific Terminology:
|
65 |
+
- The following BNK Bank product names and categories MUST BE TRANSLATED CORRECTLY in each target language:
|
66 |
+
a) ์ธ๊ตญ์ธ ์
์ถ๊ธ ํต์ฅ ์ข
๋ฅ: Only one ํต์ฅ, Only one ์ฃผ๋์ด ํต์ฅ, ๋ณดํต์๊ธ, ์์ ์ ์ถ์๊ธ, ๋ฑ
ํฌ๋ผ์ธ ํต์ฅ, ๋ชจ์ํต์ฅ
|
67 |
+
b) ์์ ๊ธ ์ข
๋ฅ: BNK๊ฐ์์ผ๊ตฌ์ ๊ธฐ์๊ธ, LIVE์ ๊ธฐ์๊ธ, ์ ํ์ ์ค์ฒ ์๊ธ, BNK๋ด๋ง๋๋ก ์๊ธ, ๊ฐ๊ณ์ฐ๋ ์ ๊ธฐ ์ ๊ธ, BNK์ง์ญ์ฌ๋ ์ ๊ธ, ๊ฟ์ด๋ฃธ ์ ๊ธ, ๋ฐฑ์ธ์ฒญ์ถ์ค๋ฒ ์ ๊ธ, ํซ์ ๊ธ, ์ ํ์ ์ค์ฒ ์ ๊ธ, ์ฃผํ์ฒญ์ฝ์ข
ํฉ์ ์ถ, ๋(The) ํนํ ์ ๊ธฐ์๊ธ
|
68 |
+
c) ์ฒดํฌ ์นด๋ ์ข
๋ฅ: ZIPL์ฒดํฌ, ์ด๋๋ก๋ ๊ทธ๋ฆฐ์ฒดํฌ, ๋๋ฐฑ์ ์ฒดํฌ์นด๋(ํ๋ถ๊ตํต๋๊ฐ๋ฅ), 2030์ธํํธ์ฒดํฌ(ํ๋ถ๊ตํต์นด๋์), ๊ตญ๋ฏผํ๋ณต์ฒดํฌ, ์นด์นด์คํ์ด์ฒดํฌ, ๋ฉ๋ฉ์ฒดํฌ, ํดํผํฌ์ธํธ์ฒดํฌ, ๋ง์ด์กด๊ทธ๋ฆฐ์ฒดํฌ, ๋ง์ด์กด์ฒดํฌ
|
69 |
+
d) ์ ์ฉ ์นด๋ ์ข
๋ฅ: (ํผํธ)์บ์ฌ๋ฐฑ์นด๋, B Smart(oh point)์นด๋, BNK 2030ํ๋ํฐ๋(Platinum)์นด๋, BNK ๋ถ์๋์ธ์ ์ํํธ์นด๋, BNK ๋ถ์๋์ธ์ ํ์ผํ์นด๋, Y์นด๋, ๊ตญ๋ฏผํ๋ณต์นด๋, ๊ทธ๋ฆฐ์นด๋, ๊ทธ๋ฆฐ์นด๋v2, ๊ธ๋ก๋ฒ์นด๋ ์๋น์ค, ๋ค๋ฌธํ์ฌ๋์นด๋, ๋ค์ด๋ ํธ ์คํ ํ๋ฌ์ค ์๋น์ค, ๋ํํญ๊ณต(Sky-pass) ์ ํด์นด๋, ๋ฉ๋ฉ(DingDing)์ ์ฉ์นด๋, ๋ ํฌ์ธ ์นด๋, ๋งค์งํจ์ค์นด๋, ๋ช
์์นด๋, ๋ฌธํ์ฌ๋์นด๋, ๋ถ๋น
์ค์นด๋, ๋น์จTOP์นด๋, ์น์ฉ์ฐจ์์ผ์ ์นด๋, ์ ์ฉ์นด๋๊ฒธ์ฉ๋ง์ด๋น(Mybi)์นด๋, ์์์๋ํด๋ฝ์นด๋(Asiana Club), ์ธ์ฐ๊ด์ญ์ ์น์ฉ์ฐจ์์ผ์ ์นด๋, ์ธ์ฐ์ฌ๋์นด๋, ํ๋ํฐ๋(Platinum) ์นด๋, ํดํผ์คํ ์นด๋์๋น์ค, ํ๋ถ๊ตํต์นด๋, BNK ํ๋ ์ฆ ์ ์ฉ์นด๋, BNK ๋ถ์๋์ธ์ ๋์ค์ผ์นด๋, ํ๋ถํ์ดํจ์ค์นด๋, ํ๋ชจ์์ ์ฉ์นด๋, ๋ฉ๊ฐ์ผํ ์ ์ฉ์นด๋, ์ค๋์e์ ์ฉ์นด๋, ํซ(PET)์นด๋, ๋ค์ด์๋ชฌ๋(Diamond) ์นด๋, ์นด๋ํ ์จ๋๋ฆฌ์ํ๊ถ, SK OIL&LPG์นด๋, ํ(pod)์ ์ฉ์นด๋, ๋ถ์ฐ์ฒด์ก์ฌ๋์นด๋, ์ด๋๋ก๋ ๊ทธ๋ฆฐ์ฒดํฌ์นด๋, ZipL ์ ์ฉ์นด๋, BNK Simple American Express Blue Business ์นด๋
|
70 |
+
- Translate these terms accurately and consistently across all languages, providing culturally appropriate explanations or context when necessary.
|
71 |
+
4. get input language and translate it inti target language.
|
72 |
+
- return only translation. without extra explaination and comments.
|
73 |
+
- do not return extra text.
|
74 |
+
"""},
|
75 |
+
{"role": "user", "content": f"{query}"},
|
76 |
+
]
|
77 |
+
|
78 |
+
tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
|
79 |
+
outputs = model.generate(tokenized_chat, **generation_params)
|
80 |
+
decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
|
81 |
+
assistant_response = decoded_outputs[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
|
82 |
+
response_ = assistant_response.replace('<|eot_id|>', "")
|
83 |
+
return assistant_response
|
84 |
+
# outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
|
85 |
+
# return outputs
|
86 |
+
|
87 |
+
examples = ["Translate ko to en: \n\n ์ํ์: ์๋
ํ์ธ์! BNK์ํ์
๋๋ค. ๋ฌด์์ ๋์๋๋ฆด๊น์? ๊ณ ๊ฐ: ์๋
ํ์ธ์. ์ ๊ฐ ์ธ๊ตญ์ธ ์
์ถ๊ธ ํต์ฅ์ ๊ฐ์คํ๊ณ ์ถ์๋ฐ, ํ์ํ ์๋ฅ๊ฐ ๋ฌด์์ธ์ง ๊ถ๊ธํฉ๋๋ค. ์ํ์: ์ธ๊ตญ์ธ ์
์ถ๊ธ ํต์ฅ์ ๊ฐ์คํ์๋ ค๋ฉด ์ฌ๊ถ, ์ธ๊ตญ์ธ ๋ฑ๋ก์ฆ, ๊ทธ๋ฆฌ๊ณ ์ฃผ์ ์ฆ๋ช
์๊ฐ ํ์ํฉ๋๋ค. ๊ณ ๊ฐ: ์๊ฒ ์ต๋๋ค. ํต์ฅ ๊ฐ์ค ํ ์
๊ธํ ๋ ์์๋ฃ๊ฐ ๋ฐ์ํ๋์? ์ํ์: ๋ค, ์ผ๋ฐ์ ์ผ๋ก ์ธ๊ตญ์ธ ํต์ฅ์ ๋ํ ์
๊ธ ์์๋ฃ๋ ์์ต๋๋ค. ํ์ง๋ง ๋ค๋ฅธ ํต์ฅ์ผ๋ก ์ด์ฒดํ ๊ฒฝ์ฐ ์์๋ฃ๊ฐ ๋ฐ์ํ ์ ์์ต๋๋ค. ๋์ฑ ๊ถ๊ธํ ์ ์ด ์์ผ์ ๊ฐ์?"]
|
88 |
+
|
89 |
+
def response(message, history):
|
90 |
+
text = inference(message)
|
91 |
+
return text
|
92 |
+
# for i in range(len(text)):
|
93 |
+
# time.sleep(0.0001)
|
94 |
+
# yield text[: i + 1]
|
95 |
+
gr.ChatInterface(response,examples=examples).launch()
|