Spaces:

kevin1911
/

LLMdemo

Running

App Files Files Community

kevin1911 commited on Feb 13

Commit

da6e24a

verified ·

1 Parent(s): f9589f3

Update app.py

Browse files

Files changed (1) hide show

app.py +156 -53

app.py CHANGED Viewed

@@ -1,94 +1,197 @@
 import torch
 import gradio as gr
 import plotly.express as px
 from transformers import AutoModel, AutoTokenizer
 ########################################
-# Load Transformer (DistilBERT) with attention
 ########################################
 model_name = "distilbert-base-uncased"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Note: output_attentions=True to extract attention matrices
 model = AutoModel.from_pretrained(model_name, output_attentions=True)
 model.eval()
-def visualize_attention(text, layer=5):
     """
-    1. Tokenize input text.
-    2. Run DistilBERT forward pass to get attention matrices.
-    3. Pick a layer (0..5) and average across attention heads.
-    4. Generate a heatmap (Plotly) of shape (seq_len x seq_len).
-    5. Label axes with tokens (Query vs. Key).
     """
     with torch.no_grad():
         inputs = tokenizer.encode_plus(text, return_tensors="pt")
         outputs = model(**inputs)
-        # outputs.attentions: tuple of shape [num_layers] each => (batch=1, num_heads, seq_len, seq_len)
-        all_attentions = outputs.attentions
-        # DistilBERT has 6 layers => valid indices: 0..5
-        attn_layer = all_attentions[layer].mean(dim=1)  # average across heads => shape: (1, seq_len, seq_len)
-    # Convert to numpy for plotting
-    attn_matrix = attn_layer[0].cpu().numpy()
-    # Get tokens (including special tokens like [CLS], [SEP])
     input_ids = inputs["input_ids"][0]
     tokens = tokenizer.convert_ids_to_tokens(input_ids)
-    # Build a Plotly heatmap
-    fig = px.imshow(
-        attn_matrix,
-        x=tokens,
-        y=tokens,
-        labels={"x": "Key (Being Attended to)", "y": "Query (Focusing)"},
-        color_continuous_scale="Blues",
-        title=f"DistilBERT Attention (Layer {layer})"
-    )
-    fig.update_xaxes(side="top")
-    # Add tooltip: shows row token, column token, and attention weight
-    fig.update_traces(
-        hovertemplate="Query: %{y}<br>Key: %{x}<br>Attention Weight: %{z:.3f}"
     )
-    return fig
-# Short explanation text for the UI
-description_text = """
-## Understanding Transformer Self-Attention
-- **Rows = "Query token"** (the token that is looking at other tokens)
-- **Columns = "Key token"** (the token being looked at)
-- Darker (or higher) color = stronger attention.
-**Transformers** process all tokens in **parallel**, not step-by-step like RNNs.
-Thus, **long-distance dependencies** are easier to capture: any token can directly
-attend to any other token, regardless of distance in the sentence.
-"""
 ########################################
-# Gradio Interface
 ########################################
 with gr.Blocks() as demo:
-    gr.Markdown("# Transformer Self-Attention Visualization (DistilBERT)")
-    gr.Markdown(description_text)
     with gr.Row():
-        text_input = gr.Textbox(
-            label="Enter a sentence",
             value="Transformers handle long-range context in parallel."
         )
-        layer_slider = gr.Slider(
             minimum=0, maximum=5, step=1, value=5,
-            label="DistilBERT Layer (0=lowest, 5=highest)"
         )
-    output_plot = gr.Plot(label="Attention Heatmap")
-    visualize_button = gr.Button("Visualize Attention")
-    visualize_button.click(
-        fn=visualize_attention,
-        inputs=[text_input, layer_slider],
-        outputs=output_plot
     )
 demo.launch()

 import torch
 import gradio as gr
 import plotly.express as px
+import numpy as np
 from transformers import AutoModel, AutoTokenizer
 ########################################
+# 1) Load DistilBERT with attention
 ########################################
 model_name = "distilbert-base-uncased"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModel.from_pretrained(model_name, output_attentions=True)
 model.eval()
+########################################
+# 2) Generate attention analysis
+########################################
+def analyze_attention(text, layer=5, top_k=3, show_heatmap=True):
     """
+    1. Tokenize 'text'.
+    2. Forward pass DistilBERT (output_attentions=True).
+    3. Extract attention from chosen layer (0..5).
+    4. Average across heads => (seq_len, seq_len).
+    5. Optionally create Plotly heatmap (fig_dict).
+    6. Create text summary of top-k focuses for each token.
+    7. Generate an "interpretation" to highlight interesting patterns.
     """
     with torch.no_grad():
         inputs = tokenizer.encode_plus(text, return_tensors="pt")
         outputs = model(**inputs)
+        all_attentions = outputs.attentions  # tuple: (#layers) each => (1, #heads, seq_len, seq_len)
+        # DistilBERT has 6 layers => valid range: 0..5
+        att = all_attentions[layer].mean(dim=1)  # average across heads => shape: (1, seq_len, seq_len)
+    att_matrix = att[0].cpu().numpy()  # (seq_len, seq_len)
     input_ids = inputs["input_ids"][0]
     tokens = tokenizer.convert_ids_to_tokens(input_ids)
+    seq_len = len(tokens)
+    # (Optional) Heatmap
+    fig_dict = None
+    if show_heatmap:
+        fig = px.imshow(
+            att_matrix,
+            x=tokens,
+            y=tokens,
+            labels={"x": "Token Being Looked At", "y": "Token Doing the Looking"},
+            color_continuous_scale="Blues",
+            title=f"DistilBERT Self-Attention (Layer {layer})"
+        )
+        fig.update_xaxes(side="top")
+        fig.update_traces(
+            hovertemplate="Row token: %{y}<br>Column token: %{x}<br>Focus Weight: %{z:.3f}"
+        )
+        fig_dict = fig.to_dict()
+    # Top-K Summary for each row
+    summary_md = "## Top-K Focus for Each Token\n"
+    summary_md += f"Showing the **top {top_k}** tokens each token focuses on.\n\n"
+    for i in range(seq_len):
+        row_token = tokens[i]
+        row_weights = att_matrix[i]
+        sorted_idx = row_weights.argsort()[::-1]
+        top_indices = sorted_idx[:top_k]
+        summary_md += f"**Token '{row_token}'** focuses on:\n"
+        for j in top_indices:
+            col_token = tokens[j]
+            weight = row_weights[j]
+            summary_md += f" - `{col_token}` (weight={weight:.3f})\n"
+        summary_md += "\n"
+    # Generate an additional "interpretation" to highlight patterns
+    interpretation_md = interpret_attention(att_matrix, tokens)
+    # Combine summaries
+    combined_md = summary_md + "\n" + interpretation_md
+    return fig_dict, combined_md
+########################################
+# 3) Interpretation function
+########################################
+def interpret_attention(att_matrix: np.ndarray, tokens: list) -> str:
+    """
+    Provide a short bullet-list interpretation of the attention matrix:
+    - Count how many tokens mostly attend to themselves (diagonal).
+    - Find the global max attention weight (which row->col?), mention tokens involved.
+    - Possibly mention if we see something interesting about distribution.
+    """
+    seq_len = len(tokens)
+    diagonal_focus_count = 0
+    # We'll track the max weight overall
+    max_val = -1.0
+    max_rc = (0, 0)
+    # For each row, check if diagonal is the top focus
+    for i in range(seq_len):
+        row = att_matrix[i]
+        best_j = row.argmax()
+        if best_j == i:
+            diagonal_focus_count += 1
+        # Check global max
+        if row[best_j] > max_val:
+            max_val = row[best_j]
+            max_rc = (i, best_j)
+    # Summaries
+    # 1) Diagonal focus stat
+    diag_msg = f"- **{diagonal_focus_count}/{seq_len} tokens** focus most on themselves (the diagonal)."
+    # 2) Global max
+    i, j = max_rc
+    token_i = tokens[i]
+    token_j = tokens[j]
+    global_msg = f"- The **highest single focus** in the matrix is **{max_val:.3f}**, from token '{token_i}' onto '{token_j}'."
+    # 3) Possibly some quick ratio
+    # For each row, sum of row vs. sum of diagonal
+    # We'll keep it simpler for now
+    interpretation = "## Additional Interpretation\n\n"
+    interpretation += (
+        "Here are some overall patterns in the attention matrix that might help you:\n\n"
     )
+    interpretation += f"{diag_msg}\n"
+    interpretation += f"{global_msg}\n"
+    interpretation += "\n- A strong diagonal means tokens often reference themselves.\n"
+    interpretation += (
+        "- If a token's top focus is another token, that suggests it's referencing or depending on that other token.\n"
+    )
+    return interpretation
 ########################################
+# 4) Gradio UI
 ########################################
+description_md = """
+# DistilBERT Self-Attention with Extra Interpretation
+**Instructions:**
+1. Type your text into the box.
+2. Choose which **layer** of DistilBERT to visualize. (Layers range 0..5).
+3. Decide how many top tokens you want listed for each token (Top-K).
+4. (Optional) Check "Show Heatmap" to see the matrix. If it's too overwhelming, uncheck and just see the summary.
+**Reading the Heatmap**:
+- **Rows** = tokens doing the looking (focus).
+- **Columns** = tokens being looked at.
+- **Color intensity** = how strongly the row token focuses on the column token.
+Below the heatmap, you'll see:
+- A **Top-K focus** summary for each token.
+- An **interpretation** bullet list that highlights interesting overall patterns.
+"""
+def run_demo(text, layer, top_k, show_heatmap):
+    fig_dict, summary_md = analyze_attention(text, layer, top_k, show_heatmap)
+    return fig_dict, summary_md
 with gr.Blocks() as demo:
+    gr.Markdown(description_md)
     with gr.Row():
+        text_in = gr.Textbox(
+            label="Enter text",
             value="Transformers handle long-range context in parallel."
         )
+        layer_in = gr.Slider(
             minimum=0, maximum=5, step=1, value=5,
+            label="DistilBERT Layer"
         )
+        topk_in = gr.Slider(
+            minimum=1, maximum=6, step=1, value=3,
+            label="Top-K Focus"
+        )
+    show_heatmap_check = gr.Checkbox(
+        label="Show Heatmap?",
+        value=True
+    )
+    run_btn = gr.Button("Analyze Attention")
+    out_plot = gr.Plot(label="Attention Heatmap")
+    out_summary = gr.Markdown(label="Attention Summaries & Interpretation")
+    run_btn.click(
+        fn=run_demo,
+        inputs=[text_in, layer_in, topk_in, show_heatmap_check],
+        outputs=[out_plot, out_summary]
     )
 demo.launch()