indonesian-whisperer

Runtime error

App Files Files Community

Cahya Wirawan commited on Feb 20, 2023

Commit

4964cc6

•

1 Parent(s): 7191ca8

add indochat

Browse files

Files changed (3) hide show

app/{web_socket.py → api.py} +69 -0
app/config.json +3 -0
app/start.sh +2 -7

app/{web_socket.py → api.py} RENAMED Viewed

@@ -59,3 +59,72 @@ async def websocket_endpoint(websocket: WebSocket):
         data = await websocket.receive_text()
         await websocket.send_text(f"Message text was: {data}")

         data = await websocket.receive_text()
         await websocket.send_text(f"Message text was: {data}")
+@app.post("/api/indochat/v1")
+async def indochat(
+        text: str = Form(default="", description="The Prompt"),
+        max_length: int = Form(default=250, description="Maximal length of the generated text"),
+        do_sample: bool = Form(default=True, description="Whether to use sampling; use greedy decoding otherwise"),
+        top_k: int = Form(default=50, description="The number of highest probability vocabulary tokens to keep "
+                                                  "for top-k-filtering"),
+        top_p: float = Form(default=0.95, description="If set to float < 1, only the most probable tokens with "
+                                                      "probabilities that add up to top_p or higher are kept "
+                                                      "for generation"),
+        temperature: float = Form(default=1.0, description="The Temperature of the softmax distribution"),
+        penalty_alpha: float = Form(default=0.6, description="Penalty alpha"),
+        repetition_penalty: float = Form(default=1.0, description="Repetition penalty"),
+        seed: int = Form(default=42, description="Random Seed"),
+        max_time: float = Form(default=60.0, description="Maximal time in seconds to generate the text")
+):
+    set_seed(seed)
+    if repetition_penalty == 0.0:
+        min_penalty = 1.05
+        max_penalty = 1.5
+        repetition_penalty = max(min_penalty + (1.0 - temperature) * (max_penalty - min_penalty), 0.8)
+    prompt = f"User: {text}\nAssistant: "
+    input_ids = tokenizer(prompt, return_tensors='pt').input_ids.to(device)
+    model.eval()
+    print("Generating text...")
+    print(f"max_length: {max_length}, do_sample: {do_sample}, top_k: {top_k}, top_p: {top_p}, "
+          f"temperature: {temperature}, repetition_penalty: {repetition_penalty}, penalty_alpha: {penalty_alpha}")
+    time_start = time.time()
+    sample_outputs = model.generate(input_ids,
+                                    penalty_alpha=penalty_alpha,
+                                    do_sample=do_sample,
+                                    min_length=200,
+                                    max_length=max_length,
+                                    top_k=top_k,
+                                    top_p=top_p,
+                                    temperature=temperature,
+                                    repetition_penalty=repetition_penalty,
+                                    num_return_sequences=1,
+                                    max_time=max_time
+                                    )
+    result = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
+    # result = result[len(prompt) + 1:]
+    time_end = time.time()
+    time_diff = time_end - time_start
+    print(f"result:\n{result}")
+    generated_text = result
+    return {"generated_text": generated_text, "processing_time": time_diff}
+def get_text_generator(model_name: str, device: str = "cpu"):
+    hf_auth_token = os.getenv("HF_AUTH_TOKEN", False)
+    print(f"hf_auth_token: {hf_auth_token}")
+    print(f"Loading model with device: {device}...")
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_auth_token)
+    model = GPT2LMHeadModel.from_pretrained(model_name, pad_token_id=tokenizer.eos_token_id,
+                                            use_auth_token=hf_auth_token)
+    model.to(device)
+    print("Model loaded")
+    return model, tokenizer
+def get_config():
+    return json.load(open("config.json", "r"))
+config = get_config()
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model, tokenizer = get_text_generator(model_name=config["model_name"], device=device)

app/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "model_name": "cahya/indochat-tiny"
+}

app/start.sh CHANGED Viewed

@@ -3,21 +3,16 @@ set -e
 cd /home/user/app
-id
-ls -ld /var/log/nginx/ /var/lib/nginx/ /run
-ls -la /
-ls -la ~
 nginx
 python whisper.py&
 if [ "$DEBUG" = true ] ; then
     echo 'Debugging - ON'
-    uvicorn web_socket:app --host 0.0.0.0 --port 7880 --reload
 else
     echo 'Debugging - OFF'
-    uvicorn web_socket:app --host 0.0.0.0 --port 7880
     echo $?
     echo END
 fi

 cd /home/user/app
 nginx
 python whisper.py&
 if [ "$DEBUG" = true ] ; then
     echo 'Debugging - ON'
+    uvicorn api:app --host 0.0.0.0 --port 7880 --reload
 else
     echo 'Debugging - OFF'
+    uvicorn api:app --host 0.0.0.0 --port 7880
     echo $?
     echo END
 fi