Update README.md

e70e4e1 verified 28 days ago

7.31 kB

	---
	license: llama3
	datasets:
	- starmpcc/Asclepius-Synthetic-Clinical-Notes
	language:
	- en
	metrics:
	- bleu
	- rouge
	base_model:
	- meta-llama/Meta-Llama-3-8B
	new_version: Xlar/orpo-qlora-mtmed-llama3-8b
	pipeline_tag: text-generation
	library_name: transformers
	tags:
	- qlora
	- orpo
	- medical
	- reasoning
	- multiple
	- tasks
	- clinical
	- notes
	- discharge
	- summaries
	- peft
	---
	# Model Card for Model ID

	<!-- Provide a quick summary of what the model is/does. -->

	This modelcard aims to be a base template for new models. It has been generated using [this raw template](https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md?plain=1).

	### Model Description

	<!-- Provide a longer summary of what this model is. -->



	- Developed by: [Xlar @ CBT IITD]
	- Funded by [optional]: [HPC IITD]
	- Shared by [optional]: [Xlar]
	- Model type: []
	- Language(s) (NLP): []
	- License: [More Information Needed]
	- Finetuned from model [optional]: ["unsloth/llama-3-8b-bnb-4bit"]

	### Model Sources [optional]

	<!-- Provide the basic links for the model. -->

	- Repository: [More Information Needed]
	- Paper [optional]: [More Information Needed]
	- Demo [optional]: [More Information Needed]

	## Uses

	<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->

	This model can be used by clinicians or medical professionals as a trial for implementing LLM for information retrieval from clinical notes

	## Bias, Risks, and Limitations

	<!-- This section is meant to convey both technical and sociotechnical limitations. -->

	It has not been tested in hospital settings!!!

	[More Information Needed]

	## How to Get Started with the Model

	Use the code below to get started with the model.

	[More Information Needed]

	from unsloth import FastLanguageModel
	import torch
	max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
	dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
	#load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.


	inf_model, tokenizer = FastLanguageModel.from_pretrained(
	model_name = Model_path, # YOUR MODEL YOU USED FOR TRAINING
	# model_name = "unsloth/llama-3-8b-bnb-4bit",
	max_seq_length = max_seq_length,
	dtype = dtype,
	load_in_4bit = True,
	)
	FastLanguageModel.for_inference(inf_model) # Enable native 2x faster inference
	#text_streamer = TextStreamer(tokenizer)


	## Evaluation

	<!-- This section describes the evaluation protocols and provides the results. -->

	## Use this code for evaluation

	model_size = sum(t.numel() for t in inf_model.parameters())
	print(f"mistral 7b size: {model_size/1000**2:.1f}M Parameters")

	tokenizer.pad_token = tokenizer.eos_token

	import csv

	inf_alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context.
	Write a response that appropriately completes the request. """

	Instruction = "Kindly complete the following task :" + example['Task']
	prompt = example['clinical_note'] +"\n" + 'question:' + example['question']
	answer = example['answer']
	text = inf_alpaca_prompt.format(Instruction, prompt)

	model_inputs = tokenizer(
	text,
	max_length=2048,
	truncation=True,
	padding = False,
	return_tensors="pt",
	)
	model_inputs.to(torch_device)

	outputs = inf_model.generate(
	**model_inputs,
	#min_new_tokens = 50,
	max_new_tokens = 150, ## very imp otherwise model outputs a lot of extended text
	num_return_sequences = 1,
	#do_sample=True,
	#top_k = 40,
	#temperature=0.7,
	#top_p=0.95,
	#repetition_penalty = 1.1,
	#no_repeat_ngram_size =0 ,
	#num_beams=5,
	) # disable sampling to test if batching affects output
	output = outputs[0]



	### Instruction:
	{}

	### Input:
	{}

	### Response:
	"""

	### Testing Data, Factors & Metrics

	# Code for evaluating the generation on ROUGE and BLEU metric

	import numpy as np
	from nltk.tokenize import sent_tokenize
	import evaluate
	import nltk
	#nltk.download('punkt')
	from datasets import load_metric

	rouge = load_metric("rouge")
	bleu = evaluate.load("bleu")


	#rouge_score = evaluate.load("rouge")
	decoded_preds = ["My name is Sanjeet Patil"]
	decoded_labels = ["My name is Sanjeet"]

	# result = rouge.compute(predictions=decoded_preds, references = decoded_labels,use_aggregator = True)
	# print(result)

	def compute_metrics(decoded_preds, decoded_labels):
	# predictions, labels = eval_pred
	# Decode generated summaries into text
	# decoded_preds = tokenizer.batch_decode(preds, skip_special_tokens=True)
	# Replace -100 in the labels as we can't decode them
	# labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
	# Decode reference summaries into text
	# decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)
	# decoded_labels = tokenizer.decode(labels, skip_special_tokens=True)
	# ROUGE expects a newline after each sentence
	# decoded_preds = ["\n".join(sent_tokenize(pred.strip())) for pred in decoded_preds]
	# decoded_labels = ["\n".join(sent_tokenize(label.strip())) for label in decoded_labels]


	decoded_preds = ["\n".join(sent_tokenize(decoded_preds.strip()))]
	decoded_labels = ["\n".join(sent_tokenize(decoded_labels.strip()))]

	# print(decoded_preds)
	# print(decoded_labels)
	# print("decoded_preds",len(decoded_preds))
	# print("decoded_labels",len(decoded_labels))
	# print(decoded_preds)

	# Compute ROUGE scores
	# result = rouge_score.compute(
	# predictions=decoded_preds, references=decoded_labels, use_stemmer=True
	# )
	result_rouge = rouge.compute(predictions=decoded_preds, references = decoded_labels,use_aggregator = True)

	try:
	result_bleu = bleu.compute(predictions=decoded_preds, references=decoded_labels)
	except:
	pass

	# Extract the median scores
	# result = {key: value * 100 for key, value in result.items()}
	# return {k: round(v, 4) for k, v in result.items()}
	return result_rouge, result_bleu