import gradio as gr from huggingface_hub import hf_hub_download import json import tensorflow as tf import numpy as np model_probs_path = hf_hub_download(repo_id="tbitai/bayes-enron1-spam", filename="probs.json") with open(model_probs_path) as f: model_probs = json.load(f) UNK = '[UNK]' def tokenize(text): return tf.keras.preprocessing.text.text_to_word_sequence(text) def combine(probs): if any(p == 0 for p in probs): return 0 prod = np.prod(probs) neg_prod = np.prod([1 - p for p in probs]) if prod + neg_prod == 0: # Still possible due to floating point arithmetic return 0.5 # Assume that prod and neg_prod are equally small return prod / (prod + neg_prod) def get_interesting_probs(probs, intr_threshold): return sorted(probs, key=lambda p: abs(p - 0.5), reverse=True)[:intr_threshold] def unbias(p): return (2 * p) / (p + 1) def predict_bayes(text, intr_threshold, unbiased=False): words = tokenize(text) probs = [model_probs.get(w, model_probs[UNK]) for w in words] if unbiased: probs = [unbias(p) for p in probs] interesting_probs = get_interesting_probs(probs, intr_threshold) return combine(interesting_probs) MODELS = [ BAYES := "Bayes Enron1 spam", ] def predict(model, unbiased, intr_threshold, input_txt): if model == BAYES: return predict_bayes(input_txt, unbiased=unbiased, intr_threshold=intr_threshold) demo = gr.Interface( fn=predict, inputs=[ gr.Dropdown(choices=MODELS, value=BAYES, label="Model"), gr.Checkbox(label="Unbias", info="Correct Graham's bias?"), gr.Slider(minimum=1, maximum=20, step=1, value=15, label="Interestingness threshold", info="How many of the most interesting words to select in the probability calculation?"), gr.TextArea(label="Email"), ], outputs=[gr.Number(label="Spam probability")], title="Bayes or Spam?", description="Choose and configure your model, and predict if your email is a spam! 📨
COMING SOON: NN and LLM models.", examples=[ [BAYES, "enron actuals for june 26, 2000"], [BAYES, "stop the aging clock nerissa"], ], article="This is a demo of the models in the [Bayes or Spam?](https://github.com/tbitai/bayes-or-spam) project.", ) if __name__ == "__main__": demo.launch()