--- base_model: unsloth/mistral-7b-bnb-4bit library_name: peft license: mit datasets: - yahma/alpaca-cleaned language: - en pipeline_tag: text-generation tags: - physics - conversational --- How to use : ```python !pip install peft accelerate bitsandbytes from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer # Load the configuration for the fine-tuned model model_id = "Vijayendra/QST-Mistral-7b" config = PeftConfig.from_pretrained(model_id) # Load the base model and the fine-tuned model base_model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path) model = PeftModel.from_pretrained(base_model, model_id) # Load the tokenizer tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path) # Prepare the input for inference prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. ### Instruction: {} ### Input: {} ### Response: {}""" instruction = "Explain the significance of cyclic operators in machine learning theory." input_text = "Provide a detailed explanation suitable for a beginner in quantum machine learning." formatted_prompt = prompt.format(instruction, input_text, "") # Tokenize the input inputs = tokenizer( formatted_prompt, return_tensors="pt", max_length=2048, truncation=True ).to("cuda") # Run inference model.to("cuda") outputs = model.generate( **inputs, max_new_tokens=512, do_sample=True, temperature=0.7, top_k=50 ) # Decode and print the output response = tokenizer.decode(outputs[0], skip_special_tokens=True) print(response)