Spaces:

phanerozoic
/

SchoolSpiritAI

Sleeping

App Files Files Community

phanerozoic commited on 4 days ago

Commit

785e1a7

verified ·

1 Parent(s): b12d444

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -91

app.py CHANGED Viewed

@@ -1,15 +1,11 @@
-# app.py  •  SchoolSpirit AI chatbot Space
-# Granite‑3.3‑2B‑Instruct  |  Streaming + rate‑limit + hallucination guard
 import os, re, time, datetime, threading, traceback, torch, gradio as gr
-from transformers import (AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer)
 from transformers.utils import logging as hf_logging
-# ───────────────────────────────── Log helper ────────────────────────────────
 os.environ["HF_HOME"] = "/data/.huggingface"
 LOG_FILE = "/data/requests.log"
-def log(msg: str):
-    ts = datetime.datetime.utcnow().strftime("%H:%M:%S.%f")[:-3]
-    line = f"[{ts}] {msg}"
     print(line, flush=True)
     try:
         with open(LOG_FILE, "a") as f:
@@ -17,13 +13,9 @@ def log(msg: str):
     except FileNotFoundError:
         pass
-# ─────────────────────────────── Configuration ───────────────────────────────
-MODEL_ID          = "ibm-granite/granite-3.3-2b-instruct"
-CTX_TOKENS        = 1800
-MAX_NEW_TOKENS    = 120
-TEMP              = 0.6
-MAX_INPUT_CH      = 300
-RATE_N, RATE_SEC  = 5, 60      # 5 msgs / 60 s per IP
 SYSTEM_MSG = (
     "You are **SchoolSpirit AI**, the friendly digital mascot of "
@@ -37,121 +29,90 @@ SYSTEM_MSG = (
     "• If you can’t answer, politely direct the user to [email protected].\n"
     "• Keep language age‑appropriate; avoid profanity, politics, mature themes."
 )
-WELCOME = "Hi there! I’m SchoolSpirit AI. Ask me anything about our services!"
 strip = lambda s: re.sub(r"\s+", " ", s.strip())
-# ─────────────────────── Load tokenizer & model ──────────────────────────────
 hf_logging.set_verbosity_error()
 try:
-    log("Loading tokenizer …")
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
-    if torch.cuda.is_available():
-        log("GPU detected → loading model in FP‑16")
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            device_map="auto",
-            torch_dtype=torch.float16,
-        )
-    else:
-        log("No GPU → loading model on CPU (this is slower)")
-        model = AutoModelForCausalLM.from_pretrained(
-            MODEL_ID,
-            device_map="cpu",
-            torch_dtype="auto",
-            low_cpu_mem_usage=True,
-        )
     MODEL_ERR = None
-    log("Model loaded ✔")
-except Exception as exc:
-    MODEL_ERR = f"Model load error: {exc}"
-    log("❌ " + MODEL_ERR + "\n" + traceback.format_exc())
-# ────────────────────────── Per‑IP rate limiter ──────────────────────────────
-VISITS: dict[str, list[float]] = {}
-def allowed(ip: str) -> bool:
     now = time.time()
-    VISITS[ip] = [t for t in VISITS.get(ip, []) if now - t < RATE_SEC]
     if len(VISITS[ip]) >= RATE_N:
         return False
     VISITS[ip].append(now)
     return True
-# ─────────────────────── Prompt builder (token budget) ───────────────────────
-def build_prompt(raw: list[dict]) -> str:
     def render(m):
         if m["role"] == "system":
             return m["content"]
-        prefix = "User:" if m["role"] == "user" else "AI:"
-        return f"{prefix} {m['content']}"
-    system, convo = raw[0], raw[1:]
     while True:
-        parts = [system["content"]] + [render(m) for m in convo] + ["AI:"]
-        if len(tok.encode("\n".join(parts), add_special_tokens=False)) <= CTX_TOKENS or len(convo) <= 2:
             return "\n".join(parts)
-        convo = convo[2:]  # drop oldest user+assistant pair
-# ───────────────────────── Streaming chat callback ───────────────────────────
-def chat_fn(user_msg, chat_hist, state, request: gr.Request):
     ip = request.client.host if request else "anon"
     if not allowed(ip):
-        chat_hist.append((user_msg, "Rate limit exceeded — please wait a minute."))
-        return chat_hist, state
     user_msg = strip(user_msg or "")
     if not user_msg:
-        return chat_hist, state
-    if len(user_msg) > MAX_INPUT_CH:
-        chat_hist.append((user_msg, f"Input >{MAX_INPUT_CH} chars."))
-        return chat_hist, state
     if MODEL_ERR:
-        chat_hist.append((user_msg, MODEL_ERR))
-        return chat_hist, state
-    # append user turn & empty assistant slot
-    chat_hist.append((user_msg, ""))
     state["raw"].append({"role": "user", "content": user_msg})
     prompt = build_prompt(state["raw"])
-    input_ids = tok(prompt, return_tensors="pt").to(model.device).input_ids
     streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
     threading.Thread(
         target=model.generate,
-        kwargs=dict(
-            input_ids=input_ids,
-            max_new_tokens=MAX_NEW_TOKENS,
-            temperature=TEMP,
-            streamer=streamer,
-        ),
     ).start()
     partial = ""
-    try:
-        for token in streamer:
-            partial += token
-            # hallucination guard: stop if model starts new speaker tag
-            if "User:" in partial or "\nAI:" in partial:
-                partial = re.split(r"(?:\n?User:|\n?AI:)", partial)[0].strip()
-                break
-            chat_hist[-1] = (user_msg, partial)
-            yield chat_hist, state
-    except Exception as exc:
-        log("❌ Stream error:\n" + traceback.format_exc())
-        partial = "Apologies—internal error. Please try again."
     reply = strip(partial)
-    chat_hist[-1] = (user_msg, reply)
     state["raw"].append({"role": "assistant", "content": reply})
-    yield chat_hist, state  # final
-# ─────────────────────────── Gradio Blocks UI ────────────────────────────────
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.Markdown("### SchoolSpirit AI Chat")
-    bot = gr.Chatbot(value=[("", WELCOME)], height=480, label="SchoolSpirit AI")
-    st  = gr.State({
         "raw": [
             {"role": "system", "content": SYSTEM_MSG},
             {"role": "assistant", "content": WELCOME},
@@ -159,8 +120,8 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     })
     with gr.Row():
         txt = gr.Textbox(placeholder="Type your question here…", show_label=False, lines=1, scale=4)
-        btn = gr.Button("Send", variant="primary")
-    btn.click(chat_fn, inputs=[txt, bot, st], outputs=[bot, st])
-    txt.submit(chat_fn, inputs=[txt, bot, st], outputs=[bot, st])
 demo.launch()

 import os, re, time, datetime, threading, traceback, torch, gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from transformers.utils import logging as hf_logging
 os.environ["HF_HOME"] = "/data/.huggingface"
 LOG_FILE = "/data/requests.log"
+def log(m):
+    line = f"[{datetime.datetime.utcnow().strftime('%H:%M:%S.%f')[:-3]}] {m}"
     print(line, flush=True)
     try:
         with open(LOG_FILE, "a") as f:
     except FileNotFoundError:
         pass
+MODEL_ID = "ibm-granite/granite-3.3-2b-instruct"
+CTX_TOK, MAX_NEW, TEMP = 1800, 64, 0.6
+MAX_IN, RATE_N, RATE_T = 300, 5, 60
 SYSTEM_MSG = (
     "You are **SchoolSpirit AI**, the friendly digital mascot of "
     "• If you can’t answer, politely direct the user to [email protected].\n"
     "• Keep language age‑appropriate; avoid profanity, politics, mature themes."
 )
+WELCOME = "Hi there! I’m SchoolSpirit AI. How can I help?"
 strip = lambda s: re.sub(r"\s+", " ", s.strip())
 hf_logging.set_verbosity_error()
 try:
     tok = AutoTokenizer.from_pretrained(MODEL_ID)
+    model = AutoModelForCausalLM.from_pretrained(
+        MODEL_ID,
+        device_map="auto" if torch.cuda.is_available() else "cpu",
+        torch_dtype=torch.float16 if torch.cuda.is_available() else "auto",
+        low_cpu_mem_usage=True,
+    )
     MODEL_ERR = None
+    log("Model loaded")
+except Exception as e:
+    MODEL_ERR = f"Model load error: {e}"
+    log(MODEL_ERR + "\n" + traceback.format_exc())
+VISITS = {}
+def allowed(ip):
     now = time.time()
+    VISITS[ip] = [t for t in VISITS.get(ip, []) if now - t < RATE_T]
     if len(VISITS[ip]) >= RATE_N:
         return False
     VISITS[ip].append(now)
     return True
+def build_prompt(raw):
     def render(m):
         if m["role"] == "system":
             return m["content"]
+        return f"{'User:' if m['role']=='user' else 'AI:'} {m['content']}"
+    sys, convo = raw[0], raw[1:]
     while True:
+        parts = [sys["content"]] + [render(m) for m in convo] + ["AI:"]
+        if len(tok.encode("\n".join(parts), add_special_tokens=False)) <= CTX_TOK or len(convo) <= 2:
             return "\n".join(parts)
+        convo = convo[2:]
+def chat_fn(user_msg, hist, state, request: gr.Request):
     ip = request.client.host if request else "anon"
     if not allowed(ip):
+        hist.append((user_msg, "Rate limit exceeded — please wait a minute."))
+        return hist, state, ""
     user_msg = strip(user_msg or "")
     if not user_msg:
+        return hist, state, ""
+    if len(user_msg) > MAX_IN:
+        hist.append((user_msg, f"Input >{MAX_IN} chars."))
+        return hist, state, ""
     if MODEL_ERR:
+        hist.append((user_msg, MODEL_ERR))
+        return hist, state, ""
+    hist.append((user_msg, ""))
     state["raw"].append({"role": "user", "content": user_msg})
     prompt = build_prompt(state["raw"])
+    ids = tok(prompt, return_tensors="pt").to(model.device).input_ids
     streamer = TextIteratorStreamer(tok, skip_prompt=True, skip_special_tokens=True)
     threading.Thread(
         target=model.generate,
+        kwargs=dict(input_ids=ids, max_new_tokens=MAX_NEW, temperature=TEMP, streamer=streamer),
     ).start()
     partial = ""
+    for piece in streamer:
+        partial += piece
+        if "User:" in partial or "\nAI:" in partial:
+            partial = re.split(r"(?:\n?User:|\n?AI:)", partial)[0].strip()
+            break
+        hist[-1] = (user_msg, partial)
+        yield hist, state, ""
     reply = strip(partial)
+    hist[-1] = (user_msg, reply)
     state["raw"].append({"role": "assistant", "content": reply})
+    yield hist, state, ""
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
     gr.Markdown("### SchoolSpirit AI Chat")
+    bot = gr.Chatbot(value=[("", WELCOME)], height=480)
+    st = gr.State({
         "raw": [
             {"role": "system", "content": SYSTEM_MSG},
             {"role": "assistant", "content": WELCOME},
     })
     with gr.Row():
         txt = gr.Textbox(placeholder="Type your question here…", show_label=False, lines=1, scale=4)
+        send = gr.Button("Send", variant="primary")
+    send.click(chat_fn, inputs=[txt, bot, st], outputs=[bot, st, txt])
+    txt.submit(chat_fn, inputs=[txt, bot, st], outputs=[bot, st, txt])
 demo.launch()