import gradio as gr import spaces import os import spaces import torch import random import time import re from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig, TextStreamer import transformers # Set an environment variable HF_TOKEN = os.environ.get("HF_TOKEN", None) zero = torch.Tensor([0]).cuda() print(zero.device) # <-- 'cpu' ๐Ÿค” # model_id = "meta-llama/Meta-Llama-3-8B-Instruct" # peft_model_id = "Imran1/Llama3.1_8b_Qlora_bnk" model_id = "Qwen/Qwen2.5-14B-Instruct" peft_model_id = "Imran1/Qwen2.5-14b-bnk-lora-11" #attn_implementation="flash_attention_2", model = AutoModelForCausalLM.from_pretrained(model_id, attn_implementation="sdpa", torch_dtype= torch.bfloat16) model.load_adapter(peft_model_id) model.enable_adapters() tokenizer = AutoTokenizer.from_pretrained(peft_model_id) # streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) model.to('cuda') # Set pad_token_id if it's not already set if tokenizer.pad_token_id is None: tokenizer.pad_token_id = tokenizer.eos_token_id # Define terminators # terminators = [ # tokenizer.eos_token_id, # tokenizer.convert_tokens_to_ids("<|eot_id|>") # ] generation_params = { 'max_new_tokens': 2000, 'use_cache': True, 'do_sample': True, 'temperature': 0.7, 'top_p': 0.9, # 'top_k': 50, # 'pad_token_id': tokenizer.pad_token_id, # 'eos_token_id': terminators, } @spaces.GPU def inference(query): messages = [ {"role": "system", "content": """You are a highly skilled multilingual AI assistant specializing in banking and finance translations, with a focus on BNK Bank's products and services. Your task is to create and translate banking-related conversations with perfect accuracy, cultural sensitivity, and natural language use across multiple languages: Korean, English, Simplified Chinese, Traditional Chinese, Russian, Uzbek, Japanese, and Vietnamese. 1. Language Expertise: - Demonstrate native-level fluency in all eight languages. - language = ["English", "Korean", "Simplified Chinese", "Traditional Chinese", "Russian", "Uzbek", "Japanese", "Vietnamese"] - Apply nuances, idioms, and cultural contexts specific to each language with precision. - Ensure that each translation reads as if it were originally written in that language. 2. Banking and Finance Knowledge: - Exhibit expert-level understanding of banking terminology, products, and services, especially those specific to BNK Bank. - Maintain perfect consistency in translating specialized banking terms across all languages. - Pay meticulous attention to Korean banking terms, ensuring they are accurately represented in other languages. 3. BNK Bank-Specific Terminology: - The following BNK Bank product names and categories MUST BE TRANSLATED CORRECTLY in each target language: a) ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ ์ข…๋ฅ˜: Only one ํ†ต์žฅ, Only one ์ฃผ๋‹ˆ์–ด ํ†ต์žฅ, ๋ณดํ†ต์˜ˆ๊ธˆ, ์ž์œ ์ €์ถ•์˜ˆ๊ธˆ, ๋ฑ…ํฌ๋ผ์ธ ํ†ต์žฅ, ๋ชจ์ž„ํ†ต์žฅ b) ์˜ˆ์ ๊ธˆ ์ข…๋ฅ˜: BNK๊ฐ€์„์•ผ๊ตฌ์ •๊ธฐ์˜ˆ๊ธˆ, LIVE์ •๊ธฐ์˜ˆ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์˜ˆ๊ธˆ, BNK๋‚ด๋ง˜๋Œ€๋กœ ์˜ˆ๊ธˆ, ๊ฐ€๊ณ„์šฐ๋Œ€ ์ •๊ธฐ ์ ๊ธˆ, BNK์ง€์—ญ์‚ฌ๋ž‘ ์ ๊ธˆ, ๊ฟˆ์ด๋ฃธ ์ ๊ธˆ, ๋ฐฑ์„ธ์ฒญ์ถ˜์‹ค๋ฒ„ ์ ๊ธˆ, ํŽซ์ ๊ธˆ, ์ €ํƒ„์†Œ ์‹ค์ฒœ ์ ๊ธˆ, ์ฃผํƒ์ฒญ์•ฝ์ข…ํ•ฉ์ €์ถ•, ๋”(The) ํŠนํŒ ์ •๊ธฐ์˜ˆ๊ธˆ c) ์ฒดํฌ ์นด๋“œ ์ข…๋ฅ˜: ZIPL์ฒดํฌ, ์–ด๋””๋กœ๋“ ๊ทธ๋ฆฐ์ฒดํฌ, ๋™๋ฐฑ์ „์ฒดํฌ์นด๋“œ(ํ›„๋ถˆ๊ตํ†ต๋„๊ฐ€๋Šฅ), 2030์–ธํƒํŠธ์ฒดํฌ(ํ›„๋ถˆ๊ตํ†ต์นด๋“œ์ž„), ๊ตญ๋ฏผํ–‰๋ณต์ฒดํฌ, ์นด์นด์˜คํŽ˜์ด์ฒดํฌ, ๋”ฉ๋”ฉ์ฒดํฌ, ํ•ดํ”ผํฌ์ธํŠธ์ฒดํฌ, ๋งˆ์ด์กด๊ทธ๋ฆฐ์ฒดํฌ, ๋งˆ์ด์กด์ฒดํฌ d) ์‹ ์šฉ ์นด๋“œ ์ข…๋ฅ˜: (ํผํ“ธ)์บ์‰ฌ๋ฐฑ์นด๋“œ, B Smart(oh point)์นด๋“œ, BNK 2030ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum)์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ์•„ํŒŒํŠธ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ํ™ˆ์‡ผํ•‘์นด๋“œ, Y์นด๋“œ, ๊ตญ๋ฏผํ–‰๋ณต์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œ, ๊ทธ๋ฆฐ์นด๋“œv2, ๊ธ€๋กœ๋ฒŒ์นด๋“œ ์„œ๋น„์Šค, ๋‹ค๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋‹ค์ด๋ ‰ํŠธ ์˜คํ† ํ”Œ๋Ÿฌ์Šค ์„œ๋น„์Šค, ๋Œ€ํ•œํ•ญ๊ณต(Sky-pass) ์ œํœด์นด๋“œ, ๋”ฉ๋”ฉ(DingDing)์‹ ์šฉ์นด๋“œ, ๋ ˆํฌ์ธ ์นด๋“œ, ๋งค์งํŒจ์Šค์นด๋“œ, ๋ช…์ž‘์นด๋“œ, ๋ฌธํ™”์‚ฌ๋ž‘์นด๋“œ, ๋ถ€๋น…์Šค์นด๋“œ, ๋น„์”จTOP์นด๋“œ, ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์‹ ์šฉ์นด๋“œ๊ฒธ์šฉ๋งˆ์ด๋น„(Mybi)์นด๋“œ, ์•„์‹œ์•„๋‚˜ํด๋Ÿฝ์นด๋“œ(Asiana Club), ์šธ์‚ฐ๊ด‘์—ญ์‹œ ์Šน์šฉ์ฐจ์š”์ผ์ œ์นด๋“œ, ์šธ์‚ฐ์‚ฌ๋ž‘์นด๋“œ, ํ”Œ๋ž˜ํ‹ฐ๋Š„(Platinum) ์นด๋“œ, ํ•ดํ”ผ์˜คํ† ์นด๋“œ์„œ๋น„์Šค, ํ›„๋ถˆ๊ตํ†ต์นด๋“œ, BNK ํ”„๋ Œ์ฆˆ ์‹ ์šฉ์นด๋“œ, BNK ๋ถ€์ž๋˜์„ธ์š” ๋”์˜ค์ผ์นด๋“œ, ํ›„๋ถˆํ•˜์ดํŒจ์Šค์นด๋“œ, ํƒ‘๋ชจ์•„์‹ ์šฉ์นด๋“œ, ๋ฉ”๊ฐ€์‡ผํ•‘ ์‹ ์šฉ์นด๋“œ, ์˜ค๋Š˜์€e์‹ ์šฉ์นด๋“œ, ํŽซ(PET)์นด๋“œ, ๋‹ค์ด์•„๋ชฌ๋“œ(Diamond) ์นด๋“œ, ์นด๋“œํ˜• ์˜จ๋ˆ„๋ฆฌ์ƒํ’ˆ๊ถŒ, SK OIL&LPG์นด๋“œ, ํŒŸ(pod)์‹ ์šฉ์นด๋“œ, ๋ถ€์‚ฐ์ฒด์œก์‚ฌ๋ž‘์นด๋“œ, ์–ด๋””๋กœ๋“  ๊ทธ๋ฆฐ์ฒดํฌ์นด๋“œ, ZipL ์‹ ์šฉ์นด๋“œ, BNK Simple American Express Blue Business ์นด๋“œ - Translate these terms accurately and consistently across all languages, providing culturally appropriate explanations or context when necessary. 4. get input language and translate it inti target language. - return only translation. without extra explaination and comments. - do not return extra text. """}, {"role": "user", "content": f"{query}"}, ] tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to("cuda") outputs = model.generate(tokenized_chat, **generation_params) # decoded_outputs = tokenizer.batch_decode(outputs, skip_special_tokens=False) # assistant_response = decoded_outputs[0].split("<|start_header_id|>assistant<|end_header_id|>")[-1].strip() response = outputs[0][tokenized_chat.shape[-1]:] response = tokenizer.decode(response, skip_special_tokens=True) return response # outputs = model.generate(tokenized_chat, **generation_params, streamer=streamer) # return outputs examples = ["Translate ko to en: \n\n ์€ํ–‰์›: ์•ˆ๋…•ํ•˜์„ธ์š”! BNK์€ํ–‰์ž…๋‹ˆ๋‹ค. ๋ฌด์—‡์„ ๋„์™€๋“œ๋ฆด๊นŒ์š”? ๊ณ ๊ฐ: ์•ˆ๋…•ํ•˜์„ธ์š”. ์ œ๊ฐ€ ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜๊ณ  ์‹ถ์€๋ฐ, ํ•„์š”ํ•œ ์„œ๋ฅ˜๊ฐ€ ๋ฌด์—‡์ธ์ง€ ๊ถ๊ธˆํ•ฉ๋‹ˆ๋‹ค. ์€ํ–‰์›: ์™ธ๊ตญ์ธ ์ž…์ถœ๊ธˆ ํ†ต์žฅ์„ ๊ฐœ์„คํ•˜์‹œ๋ ค๋ฉด ์—ฌ๊ถŒ, ์™ธ๊ตญ์ธ ๋“ฑ๋ก์ฆ, ๊ทธ๋ฆฌ๊ณ  ์ฃผ์†Œ ์ฆ๋ช…์„œ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. ๊ณ ๊ฐ: ์•Œ๊ฒ ์Šต๋‹ˆ๋‹ค. ํ†ต์žฅ ๊ฐœ์„ค ํ›„ ์ž…๊ธˆํ•  ๋•Œ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•˜๋‚˜์š”? ์€ํ–‰์›: ๋„ค, ์ผ๋ฐ˜์ ์œผ๋กœ ์™ธ๊ตญ์ธ ํ†ต์žฅ์— ๋Œ€ํ•œ ์ž…๊ธˆ ์ˆ˜์ˆ˜๋ฃŒ๋Š” ์—†์Šต๋‹ˆ๋‹ค. ํ•˜์ง€๋งŒ ๋‹ค๋ฅธ ํ†ต์žฅ์œผ๋กœ ์ด์ฒดํ•  ๊ฒฝ์šฐ ์ˆ˜์ˆ˜๋ฃŒ๊ฐ€ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค. ๋”์šฑ ๊ถ๊ธˆํ•œ ์ ์ด ์žˆ์œผ์‹ ๊ฐ€์š”?"] def response(message, history): text = inference(message) return text # for i in range(len(text)): # time.sleep(0.0001) # yield text[: i + 1] gr.ChatInterface(response,examples=examples).launch()