Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
from ctransformers import AutoModelForCausalLM | |
params = { | |
"temp": 0.3, "temperature_last": False, "top_p": 0.98, "top_k": 100, "top_a": 0, "tfs": 1, "epsilon_cutoff": 0, "eta_cutoff": 0, | |
"typical_p": 1, "min_p": 0.05, "rep_pen": 1.1, "rep_pen_range": 2048, "no_repeat_ngram_size": 0, "penalty_alpha": 0, "num_beams": 1, | |
"length_penalty": 1, "min_length": 0, "encoder_rep_pen": 1, "freq_pen": 0, "presence_pen": 0, "do_sample": True, "early_stopping": False, | |
"add_bos_token": True, "truncation_length": 2048, "ban_eos_token": False, "skip_special_tokens": True, "streaming": True, "mirostat_mode": 0, | |
"mirostat_tau": 5, "mirostat_eta": 0.1, "guidance_scale": 1, "negative_prompt": "", "grammar_string": "", "banned_tokens": "", | |
"ignore_eos_token_aphrodite": False, "spaces_between_special_tokens_aphrodite": True, "type": "koboldcpp", "legacy_api": False, | |
"sampler_order": [6, 0, 5, 3, 2, 1, 4], "n": 1, "rep_pen_size": 0, "genamt": 250, "max_length": 1024 | |
} | |
llm = AutoModelForCausalLM.from_pretrained("mradermacher/Silicon-Masha-7B-GGUF", model_type="llama", model_file="Silicon-Masha-7B.Q8_0.gguf") | |
app = FastAPI() | |
async def stream(message): | |
char = "Lona Falldin" | |
user = "Unknown adventurer" | |
system = "Avoid repetition, don't loop. Develop the plot slowly, always stay in character. Describe all actions in full, elaborate, explicit, graphic, and vivid detail. Mention all relevant sensory perceptions." | |
description = "" | |
personality = "" | |
scenario = "" | |
main_prompt = f"### Instruction:\nWrite {char}'s next reply in a fictional roleplay chat between {user} and {char}. Use the provided character sheet and example dialogue for formatting direction and character speech patterns.\n\n{system}\n\nDescription of {char}:\n{description}\n{char}'s personality: {personality}\n\nScenario: {scenario}\n\nPlay the role of {char}" | |
prompt = f''' | |
Below is an instruction that describes a task. Write a response that appropriately completes the request. | |
{main_prompt} | |
### User reply: {message.strip()} | |
### Response:''' | |
generated_texts = llm(prompt) | |
return generated_texts | |