from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline,BitsAndBytesConfig # import accelerate # import bitsandbytes from langchain_core.prompts import PromptTemplate quants = BitsAndBytesConfig(load_in_4bit=True) model_id = "mistralai/Mistral-7B-Instruct-v0.2" tokenizer = AutoTokenizer.from_pretrained(model_id,qunatization_config = quants) model = AutoModelForCausalLM.from_pretrained(model_id,qunatization_config = quants) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) hf = HuggingFacePipeline(pipeline=pipe) def generate_blog(role , words , topic): template = ''' You are an expert Blog generator , Given the Topic , the intended audience and the maximum number of words , Write a blog on the given topic Topic : {topic} Intended Audince : {role} Number of Words : {words} Strictly return the output in a markdown format''' prompt = PromptTemplate.from_template(template) chain = prompt | hf return chain.invoke({"topic": topic,"words":words,"role":role})