tosi-n7's picture
Upload folder using huggingface_hub
d8ffdc4
import pandas as pd
import os
import torch
from datasets import load_dataset
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
BitsAndBytesConfig,
HfArgumentParser,
TrainingArguments,
pipeline,
logging,
)
from peft import LoraConfig, PeftModel, get_peft_model
from trl import SFTTrainer
from guardrail.client import (
run_metrics,
run_simple_metrics,
create_dataset)
import src.config
# from model import load_model
def text_gen_eval_wrapper(model, tokenizer, prompt, model_id=1, show_metrics=True, temp=0.7, max_length=200):
"""
A wrapper function for inferencing, evaluating, and logging text generation pipeline.
Parameters:
model (str or object): The model name or the initialized text generation model.
tokenizer (str or object): The tokenizer name or the initialized tokenizer for the model.
prompt (str): The input prompt text for text generation.
model_id (int, optional): An identifier for the model. Defaults to 1.
show_metrics (bool, optional): Whether to calculate and show evaluation metrics.
Defaults to True.
max_length (int, optional): The maximum length of the generated text sequence.
Defaults to 200.
Returns:
generated_text (str): The generated text by the model.
metrics (dict): Evaluation metrics for the generated text (if show_metrics is True).
"""
# Suppress Hugging Face pipeline logging
logging.set_verbosity(logging.CRITICAL)
# Initialize the pipeline
pipe = pipeline(task="text-generation",
model=model,
tokenizer=tokenizer,
max_length=max_length,
do_sample=True,
temperature=temp)
# Generate text using the pipeline
pipe = pipeline(task="text-generation",
model=model,
tokenizer=tokenizer,
max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
generated_text = result[0]['generated_text']
# Find the index of "### Assistant" in the generated text
index = generated_text.find("[/INST] ")
if index != -1:
# Extract the substring after "### Assistant"
substring_after_assistant = generated_text[index + len("[/INST] "):].strip()
else:
# If "### Assistant" is not found, use the entire generated text
substring_after_assistant = generated_text.strip()
if show_metrics:
# Calculate evaluation metrics
metrics = run_metrics(substring_after_assistant, prompt, model_id)
return substring_after_assistant, metrics
else:
return substring_after_assistant
if __name__=='__main__':
huggingface_profile = "jenesys-ai"
full_path = huggingface_profile + "/" + config.new_model
model, tokenizer, peft_config = load_model(full_path)
prompt="Who were the children of the legendary Garth Greenhand, the High King of the First Men in the series A Song of Ice and Fire?"
text_gen_eval_wrapper(model, tokenizer, prompt, show_metrics=False)