Spaces:

alfredplpl
/

llm-jp-instruct-v2

Paused

alfredplpl commited on May 1, 2024

Commit

44c960c

verified ·

1 Parent(s): 377528c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -46,7 +46,7 @@ h1 {
 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
 model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", torch_dtype=torch.bfloat16)
-#model=model.eval()
 @spaces.GPU()
 def chat_llm_jp_v2(message: str,
@@ -71,15 +71,14 @@ def chat_llm_jp_v2(message: str,
     conversation.append({"role": "user", "content": message})
     tokenized_input = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=True, return_tensors="pt").to(model.device)
-    with torch.no_grad():
-        output = model.generate(
-            tokenized_input,
-            max_new_tokens=max_new_tokens,
-            do_sample=True,
-            top_p=0.95,
-            temperature=temperature,
-            repetition_penalty=1.05,
-        )[0]
     return tokenizer.decode(output)

 # Load the tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0")
 model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac_001_16x-dolly-ichikara_004_001_single-oasst-oasst2-v2.0", device_map="auto", torch_dtype=torch.bfloat16)
+model=model.eval()
 @spaces.GPU()
 def chat_llm_jp_v2(message: str,
     conversation.append({"role": "user", "content": message})
     tokenized_input = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=True, return_tensors="pt").to(model.device)
+    output = model.generate(
+        tokenized_input,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
+        top_p=0.95,
+        temperature=temperature,
+        repetition_penalty=1.05,
+    )[0]
     return tokenizer.decode(output)