llm-tg-chatbot / app.py
lenurseytasanov
Update to ctransformers
bc25b6b
from fastapi import FastAPI
from ctransformers import AutoModelForCausalLM
params = {
"temp": 0.3, "temperature_last": False, "top_p": 0.98, "top_k": 100, "top_a": 0, "tfs": 1, "epsilon_cutoff": 0, "eta_cutoff": 0,
"typical_p": 1, "min_p": 0.05, "rep_pen": 1.1, "rep_pen_range": 2048, "no_repeat_ngram_size": 0, "penalty_alpha": 0, "num_beams": 1,
"length_penalty": 1, "min_length": 0, "encoder_rep_pen": 1, "freq_pen": 0, "presence_pen": 0, "do_sample": True, "early_stopping": False,
"add_bos_token": True, "truncation_length": 2048, "ban_eos_token": False, "skip_special_tokens": True, "streaming": True, "mirostat_mode": 0,
"mirostat_tau": 5, "mirostat_eta": 0.1, "guidance_scale": 1, "negative_prompt": "", "grammar_string": "", "banned_tokens": "",
"ignore_eos_token_aphrodite": False, "spaces_between_special_tokens_aphrodite": True, "type": "koboldcpp", "legacy_api": False,
"sampler_order": [6, 0, 5, 3, 2, 1, 4], "n": 1, "rep_pen_size": 0, "genamt": 250, "max_length": 1024
}
llm = AutoModelForCausalLM.from_pretrained("mradermacher/Silicon-Masha-7B-GGUF", model_type="llama", model_file="Silicon-Masha-7B.Q8_0.gguf")
app = FastAPI()
@app.post("/send")
async def stream(message):
char = "Lona Falldin"
user = "Unknown adventurer"
system = "Avoid repetition, don't loop. Develop the plot slowly, always stay in character. Describe all actions in full, elaborate, explicit, graphic, and vivid detail. Mention all relevant sensory perceptions."
description = ""
personality = ""
scenario = ""
main_prompt = f"### Instruction:\nWrite {char}'s next reply in a fictional roleplay chat between {user} and {char}. Use the provided character sheet and example dialogue for formatting direction and character speech patterns.\n\n{system}\n\nDescription of {char}:\n{description}\n{char}'s personality: {personality}\n\nScenario: {scenario}\n\nPlay the role of {char}"
prompt = f'''
Below is an instruction that describes a task. Write a response that appropriately completes the request.
{main_prompt}
### User reply: {message.strip()}
### Response:'''
generated_texts = llm(prompt)
return generated_texts