FINGU-AI commited on
Commit
a4c7253
โ€ข
1 Parent(s): 3ca9f84

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +95 -0
app.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import os
4
+ import spaces
5
+ import torch
6
+ import random
7
+ import time
8
+ import re
9
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer
10
+ import transformers
11
+
12
+
13
+
14
+ # Set an environment variable
15
+ HF_TOKEN = os.environ.get("HF_TOKEN", None)
16
+
17
+ zero = torch.Tensor([0]).cuda()
18
+ print(zero.device) # <-- 'cpu' ๐Ÿค”
19
+
20
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
21
+ peft_model_id = "Imran1/Llama3.1_8b_Dora"
22
+ #attn_implementation="flash_attention_2",
23
+ model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", torch_dtype= torch.bfloat16)
24
+ model.load_adapter(peft_model_id)
25
+ tokenizer = AutoTokenizer.from_pretrained(peft_model_id)
26
+ # streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
27
+ model.to('cuda')
28
+
29
+ # Set pad_token_id if it's not already set
30
+ if tokenizer.pad_token_id is None:
31
+ tokenizer.pad_token_id = tokenizer.eos_token_id
32
+
33
+ # Define terminators
34
+ terminators = [
35
+ tokenizer.eos_token_id,
36
+ tokenizer.convert_tokens_to_ids("<|eot_id|>")
37
+ ]
38
+
39
+ generation_params = {
40
+ 'max_new_tokens': 2000,
41
+ 'use_cache': True,
42
+ 'do_sample': True,
43
+ 'temperature': 0.7,
44
+ 'top_p': 0.9,
45
+ # 'top_k': 50,
46
+ 'pad_token_id': tokenizer.pad_token_id,
47
+ 'eos_token_id': terminators,
48
+ }
49
+
50
+
51
+ @spaces.GPU
52
+ def inference(query):
53
+ messages = [
54
+ {"role": "system", "content": """You are a highly skilled multilingual AI assistant specializing in banking and finance translations, with a focus on BNK Bank's products and services. Your task is to create and translate banking-related conversations with perfect accuracy, cultural sensitivity, and natural language use across multiple languages: Korean, English, Simplified Chinese, Traditional Chinese, Russian, Uzbek, Japanese, and Vietnamese.
55
+ 1. Language Expertise:
56
+ - Demonstrate native-level fluency in all eight languages.
57
+ - language = ["English", "Korean", "Simplified Chinese", "Traditional Chinese", "Russian", "Uzbek", "Japanese", "Vietnamese"]
58
+ - Apply nuances, idioms, and cultural contexts specific to each language with precision.
59
+ - Ensure that each translation reads as if it were originally written in that language.
60
+ 2. Banking and Finance Knowledge:
61
+ - Exhibit expert-level understanding of banking terminology, products, and services, especially those specific to BNK Bank.
62
+ - Maintain perfect consistency in translating specialized banking terms across all languages.
63
+ - Pay meticulous attention to Korean banking terms, ensuring they are accurately represented in other languages.
64
+ 3. BNK Bank-Specific Terminology:
65
+ - The following BNK Bank product names and categories MUST BE TRANSLATED CORRECTLY in each target language:
66
+ a) ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ ์ข…๋ฅ˜: Only one ํ†ต์žฅ, Only one ์ฃผ๋‹ˆ์–ด ํ†ต์žฅ, ๋ณดํ†ต์˜ˆ๊ธˆ, ์ž์œ ์ €์ถ•์˜ˆ๊ธˆ, ๋ฑ…ํฌ๋ผ์ธ ํ†ต์žฅ, ๋ชจ์ž„ํ†ต์žฅ
67
+ b) ์˜ˆ์ ๊ธˆ ์ข…๋ฅ˜: BNK๊ฐ€์„์•ผ๊ตฌ์ •๊ธฐ์˜ˆ๊ธˆ, LIVE์ •๊ธฐ์˜ˆ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์˜ˆ๊ธˆ, BNK๋‚ด๋ง˜๋Œ€๋กœ ์˜ˆ๊ธˆ, ๊ฐ€๊ณ„์šฐ๋Œ€ ์ •๊ธฐ ์ ๊ธˆ, BNK์ง€์—ญ์‚ฌ๋ž‘ ์ ๊ธˆ, ๊ฟˆ์ด๋ฃธ ์ ๊ธˆ, ๋ฐฑ์„ธ์ฒญ์ถ˜์‹ค๋ฒ„ ์ ๊ธˆ, ํŽซ์ ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์ ๊ธˆ, ์ฃผํƒ์ฒญ์•ฝ์ข…ํ•ฉ์ €์ถ•, ๋”(The) ํŠนํŒ ์ •๊ธฐ์˜ˆ๊ธˆ
68
+ c) ์ฒดํฌ ์นด๋“œ ์ข…๋ฅ˜: ZIPL์ฒดํฌ, ์–ด๋””๋กœ๋“ ๊ทธ๋ฆฐ์ฒดํฌ, ๋™๋ฐฑ์ „์ฒดํฌ์นด๋“œ(ํ›„๋ถˆ๊ตํ†ต๋„๊ฐ€๋Šฅ), 2030์–ธํƒํŠธ์ฒดํฌ(ํ›„๋ถˆ๊ตํ†ต์นด๋“œ์ž„), ๊ตญ๋ฏผํ–‰๋ณต์ฒดํฌ, ์นด์นด์˜คํŽ˜์ด์ฒดํฌ, ๋”ฉ๋”ฉ์ฒดํฌ, ํ•ดํ”ผํฌ์ธํŠธ์ฒดํฌ, ๋งˆ์ด์กด๊ทธ๋ฆฐ์ฒดํฌ, ๋งˆ์ด์กด์ฒดํฌ
69
+ d) ์‹ ์šฉ ์นด๋“œ ์ข…๋ฅ˜: (ํผํ“ธ)์บ์‰ฌ๋ฐฑ์นด๋“œ, B Smart(oh point)์นด๋“œ, BNK 2030ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum)์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ์•„ํŒŒํŠธ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ํ™ˆ์‡ผํ•‘์นด๋“œ, Y์นด๋“œ, ๊ตญ๋ฏผํ–‰๋ณต์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œv2, ๊ธ€๋กœ๋ฒŒ์นด๋“œ ์„œ๋น„์Šค, ๋‹ค๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋‹ค์ด๋ ‰ํŠธ ์˜คํ† ํ”Œ๋Ÿฌ์Šค ์„œ๋น„์Šค, ๋Œ€ํ•œํ•ญ๊ณต(Sky-pass) ์ œํœด์นด๋“œ, ๋”ฉ๋”ฉ(DingDing)์‹ ์šฉ์นด๋“œ, ๋ ˆํฌ์ธ ์นด๋“œ, ๋งค์งํŒจ์Šค์นด๋“œ, ๋ช…์ž‘์นด๋“œ, ๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋ถ€๋น…์Šค์นด๋“œ, ๋น„์”จTOP์นด๋“œ, ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์‹ ์šฉ์นด๋“œ๊ฒธ์šฉ๋งˆ์ด๋น„(Mybi)์นด๋“œ, ์•„์‹œ์•„๋‚˜ํด๋Ÿฝ์นด๋“œ(Asiana Club), ์šธ์‚ฐ๊ด‘์—ญ์‹œ ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์šธ์‚ฐ์‚ฌ๋ž‘์นด๋“œ, ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum) ์นด๋“œ, ํ•ดํ”ผ์˜คํ† ์นด๋“œ์„œ๋น„์Šค, ํ›„๋ถˆ๊ตํ†ต์นด๋“œ, BNK ํ”„๋ Œ์ฆˆ ์‹ ์šฉ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ๋”์˜ค์ผ์นด๋“œ, ํ›„๋ถˆํ•˜์ดํŒจ์Šค์นด๋“œ, ํƒ‘๋ชจ์•„์‹ ์šฉ์นด๋“œ, ๋ฉ”๊ฐ€์‡ผํ•‘ ์‹ ์šฉ์นด๋“œ, ์˜ค๋Š˜์€e์‹ ์šฉ์นด๋“œ, ํŽซ(PET)์นด๋“œ, ๋‹ค์ด์•„๋ชฌ๋“œ(Diamond) ์นด๋“œ, ์นด๋“œํ˜• ์˜จ๋ˆ„๋ฆฌ์ƒํ’ˆ๊ถŒ, SK OIL&LPG์นด๋“œ, ํŒŸ(pod)์‹ ์šฉ์นด๋“œ, ๋ถ€์‚ฐ์ฒด์œก์‚ฌ๋ž‘์นด๋“œ, ์–ด๋””๋กœ๋“  ๊ทธ๋ฆฐ์ฒดํฌ์นด๋“œ, ZipL ์‹ ์šฉ์นด๋“œ, BNK Simple American Express Blue Business ์นด๋“œ
70
+ - Translate these terms accurately and consistently across all languages, providing culturally appropriate explanations or context when necessary.
71
+ 4. get input language and translate it inti target language.
72
+ - return only translation. without extra explaination and comments.
73
+ - do not return extra text.
74
+ """},
75
+ {"role": "user", "content": f"{query}"},
76
+ ]
77
+
78
+ tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda")
79
+ outputs = model.generate(tokenized_chat, **generation_params)
80
+ decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False)
81
+ assistant_response = decoded_outputs[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip()
82
+ response_ = assistant_response.replace('<|eot_id|>', "")
83
+ return assistant_response
84
+ # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer)
85
+ # return outputs
86
+
87
+ examples = ["Translate ko to en: \n\n ์€ํ–‰์›: ์•ˆ๋…•ํ•˜์„ธ์š”! BNK์€ํ–‰์ž…๋‹ˆ๋‹ค. ๋ฌด์—‡์„ ๋„์™€๋“œ๋ฆด๊นŒ์š”? ๊ณ ๊ฐ: ์•ˆ๋…•ํ•˜์„ธ์š”. ์ œ๊ฐ€ ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜๊ณ  ์‹ถ์€๋ฐ, ํ•„์š”ํ•œ ์„œ๋ฅ˜๊ฐ€ ๋ฌด์—‡์ธ์ง€ ๊ถ๊ธˆํ•ฉ๋‹ˆ๋‹ค. ์€ํ–‰์›: ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜์‹œ๋ ค๋ฉด ์—ฌ๊ถŒ, ์™ธ๊ตญ์ธ ๋“ฑ๋ก์ฆ, ๊ทธ๋ฆฌ๊ณ  ์ฃผ์†Œ ์ฆ๋ช…์„œ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. ๊ณ ๊ฐ: ์•Œ๊ฒ ์Šต๋‹ˆ๋‹ค. ํ†ต์žฅ ๊ฐœ์„ค ํ›„ ์ž…๊ธˆํ•  ๋•Œ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•˜๋‚˜์š”? ์€ํ–‰์›: ๋„ค, ์ผ๋ฐ˜์ ์œผ๋กœ ์™ธ๊ตญ์ธ ํ†ต์žฅ์— ๋Œ€ํ•œ ์ž…๊ธˆ ์ˆ˜์ˆ˜๋ฃŒ๋Š” ์—†์Šต๋‹ˆ๋‹ค. ํ•˜์ง€๋งŒ ๋‹ค๋ฅธ ํ†ต์žฅ์œผ๋กœ ์ด์ฒดํ•  ๊ฒฝ์šฐ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋”์šฑ ๊ถ๊ธˆํ•œ ์ ์ด ์žˆ์œผ์‹ ๊ฐ€์š”?"]
88
+
89
+ def response(message, history):
90
+ text = inference(message)
91
+ return text
92
+ # for i in range(len(text)):
93
+ # time.sleep(0.0001)
94
+ # yield text[: i + 1]
95
+ gr.ChatInterface(response,examples=examples).launch()