MoxoffSrL
/

AzzurroQuantized

Text Generation

Inference Endpoints

Model card Files Files and versions Community

marcodambra commited on Apr 8, 2024

Commit

adc93ab

·

verified ·

1 Parent(s): fb2fbd7

Update README.md

Files changed (1) hide show

README.md +4 -8

README.md CHANGED Viewed

@@ -50,8 +50,10 @@ llm = Llama(
 )
 # Simple inference example
 output = llm(
-  "[INST] {prompt} [/INST]", # Prompt
   max_tokens=512,  # Generate up to 512 tokens
   stop=["[INST]"],   # Example stop token
   echo=True        # Whether to echo the prompt
@@ -62,13 +64,7 @@ output = llm(
 llm = Llama(model_path="/path/to/model.gguf", chat_format="mistral-instruct")
 response = llm.create_chat_completion(
     messages = [
-        {"role": "system", "content": "You are a story writing assistant."},
-        {
-            "role": "user",
-            "content": "Qual è il tuo piatto preferito??"
-        },
-        {"role": "assistant", "content": "Beh, ho un debole per una buona porzione di risotto allo zafferano. È un piatto che si distingue per il suo sapore ricco e il suo bellissimo colore dorato, rendendolo irresistibile!"},
-        {"role": "user", "content": "Hai delle ricette con il risotto che consigli?"},
     ]
 )

 )
 # Simple inference example
+prompt = "Sample prompt"
 output = llm(
+  f"[INST] {prompt} [/INST]", # Prompt
   max_tokens=512,  # Generate up to 512 tokens
   stop=["[INST]"],   # Example stop token
   echo=True        # Whether to echo the prompt
 llm = Llama(model_path="/path/to/model.gguf", chat_format="mistral-instruct")
 response = llm.create_chat_completion(
     messages = [
+        {"role": "user", "content": prompt},
     ]
 )