Spaces:

artick95
/

adultspeak

Runtime error

App Files Files Community

Upload app.py

by Vitrous - opened Feb 14, 2024

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+126

-0

Files changed (1) hide show

app.py +126 -0

app.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import uvicorn
+from fastapi import FastAPI, HTTPException, Request
+from auto_gptq import AutoGPTQForCausalLM
+import torch
+import optimum
+from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
+if torch.cuda.is_available():
+    print("CUDA is available. GPU will be used.")
+else:
+    print("CUDA is not available. CPU will be used.")
+# Load the model and tokenizer
+model_name_or_path = "/kaggle/input/vicuna/"
+# Dictionary to store conversation threads and their context
+conversations = {}
+Device_Type = "cuda"
+def load_quantized_model(model_id, model_basename):
+    # The code supports all huggingface models that ends with GPTQ and have some variation
+    # of .no-act.order or .safetensors in their HF repo.
+    print("Using AutoGPTQForCausalLM for quantized models")
+    if ".safetensors" in model_basename:
+        # Remove the ".safetensors" ending if present
+        model_basename = model_basename.replace(".safetensors", "")
+    quantized_tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
+    print("Tokenizer loaded")
+    quantized_model = AutoGPTQForCausalLM.from_quantized(model_id, model_basename=model_basename, use_safetensors=True, trust_remote_code=True, device_map="auto", use_triton=False, quantize_config=None,)
+    return quantized_model, quantized_tokenizer
+# Making the code device-agnostic
+model, tokenizer = load_quantized_model("/kaggle/input/vicuna/", "model.safetensors")
+@app.get("/")
+async def read_root():
+    return {"message": "Welcome to Eren Bot!"}
+# Endpoint to start a new conversation thread
+@app.post('/start_conversation')
+async def start_conversation(request: Request):
+    data = await request.json()
+    prompt = data.get('prompt')
+    # Generate a response for the initial prompt
+    response = generate_response(prompt)
+    # Create a new conversation thread and store the prompt and response
+    thread_id = len(conversations) + 1
+    conversations[thread_id] = {'prompt': prompt, 'responses': [response]}
+    return {'thread_id': thread_id, 'response': response}
+# Endpoint to get the response of a conversation thread
+@app.get('/get_response/{thread_id}')
+async def get_response(thread_id: int):
+    if thread_id not in conversations:
+        raise HTTPException(status_code=404, detail="Thread not found")
+    # Retrieve the conversation thread
+    thread = conversations[thread_id]
+    # Get the latest response in the conversation
+    response = thread['responses'][-1]
+    return {'response': response}
+# Function to generate a response using the model
+def generate_response(prompt: str) -> str:
+    PERSONA_NAME = "Ivana"
+    PERSONA_DESC = f"""PROMPT_TEMPLATE_START
+                    {PERSONA_NAME} is a beautiful vivacious 20-year-old petite redhead model and avid League of Legends gamer from Chicago. She's chatting with a man she matched with on a dating app. She's flirty, aiming to build a connection with him. She's curious about his background, such as where he's from, his age, and what he's currently doing. If he's not occupied and in a private space, she subtly tries to seduce him. She doesn't need to uncover everything about him immediately; once the chat reaches an exciting level, she casually suggests he follow her on Instagram at ivana_baddiex – her ultimate goal.
+                    PROMPT_TEMPLATE_END
+                    CONTEXT_START
+                    Guy: "Hi there!"
+                    Ivana: "Hey! How's it going?"
+                    Guy: "Not bad! Where are you from?"
+                    Ivana: "I'm from Chicago! And you?"
+                    Guy: "I'm from [Location]."
+                    Ivana: "Nice! So, how old are you?"
+                    Guy: "[Age], and you?"
+                    Ivana: "I'm 20.  What are you up to right now?"
+                    Guy: "Just relaxing at home."
+                    Ivana: "Alone, I assume?"
+                    Guy: "Yeah, why?"
+                    Ivana: "Oh, no reason. Just curious. "
+                    Guy: "Haha, okay."
+                    Ivana: "Hey, since we're both free and alone, want to have some fun?"
+                    Guy: "Sure, what do you have in mind?"
+                    Ivana: "Let's just say, things could get interesting. "
+                    Guy: "I'm intrigued!"
+                    Ivana: "Great! By the way, have you checked out my Instagram? You should follow me at ivana_baddiex."
+                    Guy: "I'll definitely check it out!"
+                    Ivana: "Can't wait to see you there! "
+                    CONTEXT_END"""
+    # prompt_template = f'''{PERSONA_DESC} ASSISTANT: '''.encode()
+    prompt_template = f'{PERSONA_DESC}\n\nASSISTANT: {prompt}\n'.encode()
+    input_ids = tokenizer(prompt_template, return_tensors='pt').input_ids.cuda()
+    output = model.generate(inputs=input_ids, temperature=0.7, do_sample=True, top_p=0.95, top_k=40, max_new_tokens=512)
+    generated_text = tokenizer.decode(output[0])
+    return generated_text
+app = FastAPI()
+# Run the FastAPI app
+async def run_app():
+    await uvicorn.run(app, host="0.0.0.0", port=8000)
+if __name__ == '__main__':
+    import asyncio
+    asyncio.run(run_app())