ruslanmv
/

Medical-Mixtral-7B-v1.5k

Model card Files Files and versions Community

ruslanmv commited on Apr 18, 2024

Commit

767b092

·

verified ·

1 Parent(s): 089dc39

Update README.md

Files changed (1) hide show

README.md +43 -10

README.md CHANGED Viewed

@@ -18,31 +18,64 @@ The Medical-Mixtral-7B-v1.5k is a fine-tuned Mixtral model for answering medical
 ### Model Sources [optional]
 ## How to Get Started with the Model
 Use the code below to get started with the model.
 ```python
-from transformers import AutoModelForCausalLM, AutoTokenizer
 # Define the name of your fine-tuned model
 finetuned_model = 'ruslanmv/Medical-Mixtral-7B-v1.5k'
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(finetuned_model, trust_remote_code=True)
-# Load the model with the provided adapter configuration and weights
-model_pretrained = AutoModelForCausalLM.from_pretrained(finetuned_model, trust_remote_code=True, torch_dtype=torch.float16)
-messages = [
-    {'role': 'user', 'content': 'What should I do to reduce my weight gained due to genetic hypothyroidism?'},
-    {'role': 'assistant', 'content': ''},
-]
-input_ids = tokenizer.apply_chat_template(messages, return_tensors='pt').to('cuda')
-outputs = model_pretrained.generate(input_ids, max_new_tokens=500)
-print(tokenizer.decode(outputs[0], skip_special_tokens=True))
 ```

 ### Model Sources [optional]
 ## How to Get Started with the Model
+Installation
+```
+pip install -qU  transformers==4.36.2  datasets python-dotenv peft bitsandbytes accelerate
+```
 Use the code below to get started with the model.
 ```python
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, logging, BitsAndBytesConfig
+import os, torch
 # Define the name of your fine-tuned model
 finetuned_model = 'ruslanmv/Medical-Mixtral-7B-v1.5k'
+# Load fine-tuned model
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.bfloat16,
+    bnb_4bit_use_double_quant=False,
+)
+model_pretrained = AutoModelForCausalLM.from_pretrained(
+    finetuned_model,
+    load_in_4bit=True,
+    quantization_config=bnb_config,
+    torch_dtype=torch.bfloat16,
+    device_map="auto",
+    trust_remote_code=True
+)
 # Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(finetuned_model, trust_remote_code=True)
+# Set pad_token_id to eos_token_id
+model_pretrained.config.pad_token_id = tokenizer.eos_token_id
+pipe = pipeline(task="text-generation", model=model_pretrained, tokenizer=tokenizer, max_length=100)
+def build_prompt(question):
+  prompt=f"[INST]@Enlighten. {question} [/INST]"
+  return prompt
+question = "What does abutment of the nerve root mean?"
+prompt = build_prompt(question)
+# Generate text based on the prompt
+result = pipe(prompt)[0]
+generated_text = result['generated_text']
+# Remove the prompt from the generated text
+generated_text = generated_text.replace(prompt, "", 1).strip()
+print(generated_text)
 ```