|
""" |
|
Orify Text Detector β full-width UI + explicit verdict (Zero-GPU ready) |
|
|
|
β’ Three ModernBERT-base checkpoints (soft-vote) |
|
β’ Per-line highlights, hover tool-tips, and a big verdict banner |
|
β’ Weights auto-downloaded & cached |
|
""" |
|
|
|
|
|
from pathlib import Path |
|
import os, re, html, typing |
|
import torch, gradio as gr |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
from huggingface_hub import hf_hub_download |
|
import spaces |
|
|
|
|
|
if hasattr(torch, "compile"): |
|
def _no_compile(model: typing.Any = None, *args, **kwargs): |
|
if callable(model): |
|
return model |
|
return lambda fn: fn |
|
torch.compile = _no_compile |
|
os.environ["TORCHINDUCTOR_DISABLED"] = "1" |
|
|
|
|
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights" |
|
FILE_MAP = {"ensamble_1":"ensamble_1", |
|
"ensamble_2.bin":"ensamble_2.bin", |
|
"ensamble_3":"ensamble_3"} |
|
BASE_MODEL = "answerdotai/ModernBERT-base" |
|
NUM_LABELS = 41 |
|
|
|
LABELS = {i:name for i, name in enumerate([ |
|
"13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci", |
|
"dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small", |
|
"flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo", |
|
"gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b", |
|
"llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b", |
|
"opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b", |
|
"t0-11b","t0-3b","text-davinci-002","text-davinci-003" |
|
])} |
|
|
|
|
|
CSS = """ |
|
:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px} |
|
body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box} |
|
input,textarea,.output-box{width:100%;box-sizing:border-box} |
|
textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)} |
|
.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px} |
|
.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px} |
|
.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px} |
|
.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor} |
|
""" |
|
|
|
|
|
print("π Downloading weights β¦") |
|
local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True) |
|
for a,f in FILE_MAP.items()} |
|
|
|
print("π§© Initialising models β¦") |
|
tok = AutoTokenizer.from_pretrained(BASE_MODEL) |
|
models=[] |
|
for p in local_paths.values(): |
|
m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL, |
|
num_labels=NUM_LABELS) |
|
m.load_state_dict(torch.load(p,map_location=DEVICE)) |
|
m.to(DEVICE).eval() |
|
models.append(m) |
|
|
|
|
|
def tidy(txt:str)->str: |
|
txt=txt.replace("\r\n","\n").replace("\r","\n") |
|
txt=re.sub(r"\n\s*\n+","\n\n",txt) |
|
txt=re.sub(r"[ \t]+"," ",txt) |
|
txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt) |
|
txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt) |
|
return txt.strip() |
|
|
|
def infer(seg:str): |
|
inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE) |
|
with torch.no_grad(): |
|
probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0] |
|
ai_probs=probs.clone(); ai_probs[24]=0 |
|
ai=ai_probs.sum().item()*100; human=100-ai |
|
top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()] |
|
return human,ai,top3 |
|
|
|
|
|
@spaces.GPU |
|
def analyse(txt:str): |
|
if not txt.strip(): |
|
return "βοΈ Please paste or type some text to analyseβ¦" |
|
lines=tidy(txt).split("\n") |
|
out, h_sum, ai_sum, n=[],0.0,0.0,0 |
|
for ln in lines: |
|
if not ln.strip(): out.append("<br>"); continue |
|
n+=1 |
|
h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai |
|
cls="ai-line" if ai>h else "human-line" |
|
tip=f"AI {ai:.2f}% β’ Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%" |
|
out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>") |
|
human_avg,ai_avg=h_sum/n,ai_sum/n |
|
verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>" |
|
f"Human-written {human_avg:.2f}%</span>" |
|
if human_avg>=ai_avg else |
|
f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>" |
|
f"AI-generated {ai_avg:.2f}%</span>") |
|
return f"<h3>{verdict}</h3><hr>" + "<br>".join(out) |
|
|
|
|
|
with gr.Blocks(css=CSS,title="Orify Text Detector") as demo: |
|
gr.Markdown("## Orify Text Detector") |
|
gr.Markdown( |
|
"Paste text, click **Analyse**.<br>" |
|
"<span class='human-line'>Green</span>=human | " |
|
"<span class='ai-line'>Red</span>=AI.<br>" |
|
"Hover a line to see confidence & top-3 AI models." |
|
) |
|
inp=gr.Textbox(lines=8,placeholder="Paste text here β¦") |
|
btn=gr.Button("Analyse") |
|
out=gr.HTML(elem_classes=["output-box"]) |
|
btn.click(analyse,inp,out) |
|
gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble Β© 2025</sub>") |
|
|
|
if __name__=="__main__": |
|
demo.launch() |
|
|