Spaces:
Runtime error
Runtime error
add gitignore
Browse files- .gitignore +1 -0
- app.py +32 -8
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
cache/
|
app.py
CHANGED
@@ -10,16 +10,14 @@ import os
|
|
10 |
import gradio as gr
|
11 |
import requests
|
12 |
import random
|
13 |
-
# from dotenv import load_dotenv
|
14 |
import googletrans
|
15 |
translator = googletrans.Translator()
|
16 |
|
17 |
-
# load_dotenv()
|
18 |
model = None
|
19 |
tokenizer = None
|
20 |
generator = None
|
21 |
|
22 |
-
os.environ["CUDA_VISIBLE_DEVICES"]="
|
23 |
|
24 |
def load_model(model_name, eight_bit=0, device_map="auto"):
|
25 |
global model, tokenizer, generator
|
@@ -32,20 +30,29 @@ def load_model(model_name, eight_bit=0, device_map="auto"):
|
|
32 |
gpu_count = torch.cuda.device_count()
|
33 |
print('gpu_count', gpu_count)
|
34 |
|
|
|
|
|
|
|
|
|
|
|
35 |
print(model_name)
|
36 |
tokenizer = transformers.LLaMATokenizer.from_pretrained(model_name)
|
37 |
model = transformers.LLaMAForCausalLM.from_pretrained(
|
38 |
model_name,
|
39 |
#device_map=device_map,
|
40 |
#device_map="auto",
|
41 |
-
torch_dtype=
|
42 |
#max_memory = {0: "14GB", 1: "14GB", 2: "14GB", 3: "14GB",4: "14GB",5: "14GB",6: "14GB",7: "14GB"},
|
43 |
#load_in_8bit=eight_bit,
|
44 |
#from_tf=True,
|
45 |
low_cpu_mem_usage=True,
|
46 |
load_in_8bit=False,
|
47 |
cache_dir="cache"
|
48 |
-
)
|
|
|
|
|
|
|
|
|
49 |
generator = model.generate
|
50 |
|
51 |
# chat doctor
|
@@ -68,7 +75,11 @@ def chatdoctor(input, state):
|
|
68 |
print('fulltext: ',fulltext)
|
69 |
|
70 |
generated_text = ""
|
71 |
-
gen_in = tokenizer(fulltext, return_tensors="pt").input_ids
|
|
|
|
|
|
|
|
|
72 |
in_tokens = len(gen_in)
|
73 |
print('len token',in_tokens)
|
74 |
with torch.no_grad():
|
@@ -97,9 +108,22 @@ def chatdoctor(input, state):
|
|
97 |
|
98 |
def predict(input, chatbot, state):
|
99 |
print('predict state: ', state)
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
response = chatdoctor(en_input, state)
|
102 |
-
|
|
|
|
|
|
|
|
|
|
|
103 |
state.append(response)
|
104 |
chatbot.append((input, ko_response))
|
105 |
return chatbot, state
|
|
|
10 |
import gradio as gr
|
11 |
import requests
|
12 |
import random
|
|
|
13 |
import googletrans
|
14 |
translator = googletrans.Translator()
|
15 |
|
|
|
16 |
model = None
|
17 |
tokenizer = None
|
18 |
generator = None
|
19 |
|
20 |
+
os.environ["CUDA_VISIBLE_DEVICES"]=""
|
21 |
|
22 |
def load_model(model_name, eight_bit=0, device_map="auto"):
|
23 |
global model, tokenizer, generator
|
|
|
30 |
gpu_count = torch.cuda.device_count()
|
31 |
print('gpu_count', gpu_count)
|
32 |
|
33 |
+
if torch.cuda.is_available():
|
34 |
+
torch_dtype = torch.float16
|
35 |
+
else:
|
36 |
+
torch_dtype = torch.float32
|
37 |
+
|
38 |
print(model_name)
|
39 |
tokenizer = transformers.LLaMATokenizer.from_pretrained(model_name)
|
40 |
model = transformers.LLaMAForCausalLM.from_pretrained(
|
41 |
model_name,
|
42 |
#device_map=device_map,
|
43 |
#device_map="auto",
|
44 |
+
torch_dtype=torch_dtype,
|
45 |
#max_memory = {0: "14GB", 1: "14GB", 2: "14GB", 3: "14GB",4: "14GB",5: "14GB",6: "14GB",7: "14GB"},
|
46 |
#load_in_8bit=eight_bit,
|
47 |
#from_tf=True,
|
48 |
low_cpu_mem_usage=True,
|
49 |
load_in_8bit=False,
|
50 |
cache_dir="cache"
|
51 |
+
)
|
52 |
+
if torch.cuda.is_available():
|
53 |
+
model = model.cuda()
|
54 |
+
else:
|
55 |
+
model = model.cpu()
|
56 |
generator = model.generate
|
57 |
|
58 |
# chat doctor
|
|
|
75 |
print('fulltext: ',fulltext)
|
76 |
|
77 |
generated_text = ""
|
78 |
+
gen_in = tokenizer(fulltext, return_tensors="pt").input_ids
|
79 |
+
if torch.cuda.is_available():
|
80 |
+
gen_in = gen_in.cuda()
|
81 |
+
else:
|
82 |
+
gen_in = gen_in.cpu()
|
83 |
in_tokens = len(gen_in)
|
84 |
print('len token',in_tokens)
|
85 |
with torch.no_grad():
|
|
|
108 |
|
109 |
def predict(input, chatbot, state):
|
110 |
print('predict state: ', state)
|
111 |
+
|
112 |
+
# input에 한국어가 detect 되면 영어로 변경, 아니면 그대로
|
113 |
+
is_kor = True
|
114 |
+
if googletrans.Translator().detect(input).lang == 'ko':
|
115 |
+
en_input = translator.translate(input, src='ko', dest='en').text
|
116 |
+
else:
|
117 |
+
en_input = input
|
118 |
+
is_kor = False
|
119 |
+
|
120 |
response = chatdoctor(en_input, state)
|
121 |
+
|
122 |
+
if is_kor:
|
123 |
+
ko_response = translator.translate(response, src='en', dest='ko').text
|
124 |
+
else:
|
125 |
+
ko_response = response
|
126 |
+
|
127 |
state.append(response)
|
128 |
chatbot.append((input, ko_response))
|
129 |
return chatbot, state
|