Spaces:

DioulaD
/

DioulaD-falcon-7b-instruct-qlora-ge-dq-v2

Runtime error

App Files Files Community

DioulaD-falcon-7b-instruct-qlora-ge-dq-v2 / app.py

DioulaD

Update app.py

edc264b 10 months ago

raw

history blame

No virus

1.86 kB

	import gradio as gr
	from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline, AutoModel
	from peft import PeftModel, PeftConfig
	import torch

	def load_peft_model():
	peft_model_id = "DioulaD/falcon-7b-instruct-qlora-ge-dq-v2"
	model = AutoModelForCausalLM.from_pretrained(
	"tiiuae/falcon-7b-instruct",
	torch_dtype=torch.bfloat16,
	device_map="auto",
	trust_remote_code=True,
	)
	model = PeftModel.from_pretrained(model, peft_model_id)
	model = model.merge_and_unload()

	config = PeftConfig.from_pretrained(peft_model_id)

	tknizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
	tknizer.pad_token = tknizer.eos_token
	return model, tknizer

	model, tknizer = load_peft_model()

	def get_expectations(prompt):
	"""
	Convert natural language query to great expectation methods using finetuned falcon 7b
	Params:
	prompt : Natural language query
	model : Model download from huggingface hub
	tknizer = Tokenizer from peft model
	"""
	try:
	# If CUDA support is not available, encoding will silenty fail if cuda:0 is hardcoded
	if torch.cuda.is_available():
	device = 'cuda:0'
	else:
	device = 'cpu'

	encoding = tknizer(prompt, return_tensors="pt").to(device)

	with torch.inference_mode():
	out = model.generate(
	input_ids=encoding.input_ids,
	attention_mask=encoding.attention_mask,
	max_new_tokens=100, do_sample=True, temperature=0.3,
	eos_token_id=tknizer.eos_token_id,
	top_k=0
	)
	response = tknizer.decode(out[0], skip_special_tokens=True)
	return response.split("\n")[1]

	except Exception as e:
	print("An error occurred: ", e)

	iface = gr.Interface(fn=get_expectations, inputs="text", outputs="text")
	iface.launch()