--- license: llama3 datasets: - starmpcc/Asclepius-Synthetic-Clinical-Notes language: - en metrics: - bleu - rouge base_model: - meta-llama/Meta-Llama-3-8B new_version: Xlar/orpo-qlora-mtmed-llama3-8b pipeline_tag: text-generation library_name: transformers tags: - qlora - orpo - medical - reasoning - multiple - tasks - clinical - notes - discharge - summaries - peft --- # Model Card for Model ID This modelcard aims to be a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1). ### Model Description - **Developed by:** [Xlar @ CBT IITD] - **Funded by [optional]:** [HPC IITD] - **Shared by [optional]:** [Xlar] - **Model type:** [] - **Language(s) (NLP):** [] - **License:** [More Information Needed] - **Finetuned from model [optional]:** ["unsloth/llama-3-8b-bnb-4bit"] ### Model Sources [optional] - **Repository:** [More Information Needed] - **Paper [optional]:** [More Information Needed] - **Demo [optional]:** [More Information Needed] ## Uses This model can be used by clinicians or medical professionals as a trial for implementing LLM for information retrieval from clinical notes ## Bias, Risks, and Limitations It has not been tested in hospital settings!!! [More Information Needed] ## How to Get Started with the Model Use the code below to get started with the model. [More Information Needed] from unsloth import FastLanguageModel import torch max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ #load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. inf_model, tokenizer = FastLanguageModel.from_pretrained( model_name = Model_path, # YOUR MODEL YOU USED FOR TRAINING # model_name = "unsloth/llama-3-8b-bnb-4bit", max_seq_length = max_seq_length, dtype = dtype, load_in_4bit = True, ) FastLanguageModel.for_inference(inf_model) # Enable native 2x faster inference #text_streamer = TextStreamer(tokenizer) ## Evaluation ## Use this code for evaluation model_size = sum(t.numel() for t in inf_model.parameters()) print(f"mistral 7b size: {model_size/1000**2:.1f}M Parameters") tokenizer.pad_token = tokenizer.eos_token import csv inf_alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. """ Instruction = "Kindly complete the following task :" + example['Task'] prompt = example['clinical_note'] +"\n" + 'question:' + example['question'] answer = example['answer'] text = inf_alpaca_prompt.format(Instruction, prompt) model_inputs = tokenizer( text, max_length=2048, truncation=True, padding = False, return_tensors="pt", ) model_inputs.to(torch_device) outputs = inf_model.generate( **model_inputs, #min_new_tokens = 50, max_new_tokens = 150, ## very imp otherwise model outputs a lot of extended text num_return_sequences = 1, #do_sample=True, #top_k = 40, #temperature=0.7, #top_p=0.95, #repetition_penalty = 1.1, #no_repeat_ngram_size =0 , #num_beams=5, ) # disable sampling to test if batching affects output output = outputs[0] ### Instruction: {} ### Input: {} ### Response: """ ### Testing Data, Factors & Metrics # Code for evaluating the generation on ROUGE and BLEU metric import numpy as np from nltk.tokenize import sent_tokenize import evaluate import nltk #nltk.download('punkt') from datasets import load_metric rouge = load_metric("rouge") bleu = evaluate.load("bleu") #rouge_score = evaluate.load("rouge") decoded_preds = ["My name is Sanjeet Patil"] decoded_labels = ["My name is Sanjeet"] # result = rouge.compute(predictions=decoded_preds, references = decoded_labels,use_aggregator = True) # print(result) def compute_metrics(decoded_preds, decoded_labels): # predictions, labels = eval_pred # Decode generated summaries into text # decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True) # Replace -100 in the labels as we can't decode them # labels = np.where(labels != -100, labels, tokenizer.pad_token_id) # Decode reference summaries into text # decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True) # decoded_labels = tokenizer.decode(labels, skip_special_tokens=True) # ROUGE expects a newline after each sentence # decoded_preds = ["\n".join(sent_tokenize(pred.strip())) for pred in decoded_preds] # decoded_labels = ["\n".join(sent_tokenize(label.strip())) for label in decoded_labels] decoded_preds = ["\n".join(sent_tokenize(decoded_preds.strip()))] decoded_labels = ["\n".join(sent_tokenize(decoded_labels.strip()))] # print(decoded_preds) # print(decoded_labels) # print("decoded_preds",len(decoded_preds)) # print("decoded_labels",len(decoded_labels)) # print(decoded_preds) # Compute ROUGE scores # result = rouge_score.compute( # predictions=decoded_preds, references=decoded_labels, use_stemmer=True # ) result_rouge = rouge.compute(predictions=decoded_preds, references = decoded_labels,use_aggregator = True) try: result_bleu = bleu.compute(predictions=decoded_preds, references=decoded_labels) except: pass # Extract the median scores # result = {key: value * 100 for key, value in result.items()} # return {k: round(v, 4) for k, v in result.items()} return result_rouge, result_bleu