Spaces:

Sleepyriizi
/

Orify-Text-Detection

Running

App Files Files Community

Orify-Text-Detection / app.py

Sleepyriizi

Update app.py

f1ccd02 verified about 1 month ago

raw

history blame contribute delete

6.49 kB

	"""
	Orify Text Detector – full-width UI + explicit verdict (Zero-GPU ready)

	• Three ModernBERT-base checkpoints (soft-vote)
	• Per-line highlights, hover tool-tips, and a big verdict banner
	• Weights auto-downloaded & cached
	"""

	# ── Imports ─────────────────────────────────────────────────────────────
	from pathlib import Path
	import os, re, html, typing
	import torch, gradio as gr
	from transformers import AutoTokenizer, AutoModelForSequenceClassification
	from huggingface_hub import hf_hub_download
	import spaces

	# ── Robust torch.compile shim (same as before) ──────────────────────────
	if hasattr(torch, "compile"):
	def _no_compile(model: typing.Any = None, args, *kwargs):
	if callable(model):
	return model
	return lambda fn: fn
	torch.compile = _no_compile
	os.environ["TORCHINDUCTOR_DISABLED"] = "1"

	# ── Config ──────────────────────────────────────────────────────────────
	DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	WEIGHT_REPO = "Sleepyriizi/Orify-Text-Detection-Weights"
	FILE_MAP = {"ensamble_1":"ensamble_1",
	"ensamble_2.bin":"ensamble_2.bin",
	"ensamble_3":"ensamble_3"}
	BASE_MODEL = "answerdotai/ModernBERT-base"
	NUM_LABELS = 41

	LABELS = {i:name for i, name in enumerate([
	"13B","30B","65B","7B","GLM130B","bloom_7b","bloomz","cohere","davinci",
	"dolly","dolly-v2-12b","flan_t5_base","flan_t5_large","flan_t5_small",
	"flan_t5_xl","flan_t5_xxl","gemma-7b-it","gemma2-9b-it","gpt-3.5-turbo",
	"gpt-35","gpt-4","gpt-4o","gpt-j","gpt-neox","human","llama3-70b",
	"llama3-8b","mixtral-8x7b","opt-1.3b","opt-125m","opt-13b","opt-2.7b",
	"opt-30b","opt-350m","opt-6.7b","opt-iml-30b","opt-iml-max-1.3b",
	"t0-11b","t0-3b","text-davinci-002","text-davinci-003"
	])}

	# ── CSS (full-width layout) ─────────────────────────────────────────────
	CSS = """
	:root{--ai:#ff4d4f;--human:#52c41a;--border:2px solid var(--ai);--radius:10px}
	body{font-family:'Roboto Mono',monospace;margin:0;padding:32px;box-sizing:border-box}
	input,textarea,.output-box{width:100%;box-sizing:border-box}
	textarea{padding:16px;font-size:1rem;border:var(--border);border-radius:var(--radius)}
	.output-box{min-height:200px;border:var(--border);border-radius:var(--radius);padding:16px}
	.ai-line{background:rgba(255,77,79,.12);padding:2px 4px;border-radius:4px}
	.human-line{background:rgba(82,196,26,.12);padding:2px 4px;border-radius:4px}
	.prob-tooltip{cursor:help;border-bottom:1px dotted currentColor}
	"""

	# ── Load weights & models - one time ─────────────────────────────────────
	print("🔄 Downloading weights …")
	local_paths = {a:hf_hub_download(WEIGHT_REPO,f,resume_download=True)
	for a,f in FILE_MAP.items()}

	print("🧩 Initialising models …")
	tok = AutoTokenizer.from_pretrained(BASE_MODEL)
	models=[]
	for p in local_paths.values():
	m = AutoModelForSequenceClassification.from_pretrained(BASE_MODEL,
	num_labels=NUM_LABELS)
	m.load_state_dict(torch.load(p,map_location=DEVICE))
	m.to(DEVICE).eval()
	models.append(m)

	# ── Helpers ─────────────────────────────────────────────────────────────
	def tidy(txt:str)->str:
	txt=txt.replace("\r\n","\n").replace("\r","\n")
	txt=re.sub(r"\n\s*\n+","\n\n",txt)
	txt=re.sub(r"[ \t]+"," ",txt)
	txt=re.sub(r"(\w+)-\n(\w+)",r"\1\2",txt)
	txt=re.sub(r"(?<!\n)\n(?!\n)"," ",txt)
	return txt.strip()

	def infer(seg:str):
	inp=tok(seg,return_tensors="pt",truncation=True,padding=True).to(DEVICE)
	with torch.no_grad():
	probs=torch.stack([torch.softmax(m(**inp).logits,dim=1) for m in models]).mean(0)[0]
	ai_probs=probs.clone(); ai_probs[24]=0
	ai=ai_probs.sum().item()*100; human=100-ai
	top3=[LABELS[i] for i in torch.topk(ai_probs,3).indices.tolist()]
	return human,ai,top3

	# ── Main analyse fn ─────────────────────────────────────────────────────
	@spaces.GPU
	def analyse(txt:str):
	if not txt.strip():
	return "✏️ Please paste or type some text to analyse…"
	lines=tidy(txt).split("\n")
	out, h_sum, ai_sum, n=[],0.0,0.0,0
	for ln in lines:
	if not ln.strip(): out.append("<br>"); continue
	n+=1
	h,ai,top3=infer(ln); h_sum+=h; ai_sum+=ai
	cls="ai-line" if ai>h else "human-line"
	tip=f"AI {ai:.2f}% • Top-3: {', '.join(top3)}" if ai>h else f"Human {h:.2f}%"
	out.append(f"<span class='{cls} prob-tooltip' title='{tip}'>{html.escape(ln)}</span>")
	human_avg,ai_avg=h_sum/n,ai_sum/n
	verdict=(f"<span class='human-line' style='padding:6px 10px;font-weight:bold'>"
	f"Human-written {human_avg:.2f}%</span>"
	if human_avg>=ai_avg else
	f"<span class='ai-line' style='padding:6px 10px;font-weight:bold'>"
	f"AI-generated {ai_avg:.2f}%</span>")
	return f"<h3>{verdict}</h3><hr>" + "<br>".join(out)

	# ── Gradio UI ───────────────────────────────────────────────────────────
	with gr.Blocks(css=CSS,title="Orify Text Detector") as demo:
	gr.Markdown("## Orify Text Detector")
	gr.Markdown(
	"Paste text, click Analyse.<br>"
	"<span class='human-line'>Green</span>=human  \|  "
	"<span class='ai-line'>Red</span>=AI.<br>"
	"Hover a line to see confidence & top-3 AI models."
	)
	inp=gr.Textbox(lines=8,placeholder="Paste text here …")
	btn=gr.Button("Analyse")
	out=gr.HTML(elem_classes=["output-box"])
	btn.click(analyse,inp,out)
	gr.Markdown("<sub>Powered by ModernBERT + Orify Ensemble © 2025</sub>")

	if __name__=="__main__":
	demo.launch()