from transformers import AutoTokenizer
from flask import Flask, request, render_template_string, jsonify
import hashlib
import random
import math
import json
import io
import os

app = Flask(__name__)
# Set maximum content length to 100MB to handle larger files
app.config['MAX_CONTENT_LENGTH'] = 100 * 1024 * 1024

# Create upload folder if it doesn't exist
UPLOAD_FOLDER = '/tmp/tokenizer_uploads'
if not os.path.exists(UPLOAD_FOLDER):
    os.makedirs(UPLOAD_FOLDER)
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# Predefined tokenizer models with aliases
TOKENIZER_MODELS = {
    'mistral-small': {
        'name': 'mistralai/Mistral-Small-24B-Instruct-2501',
        'alias': 'Mistral Small 3'
    },
    'gemma3-27b': {
        'name': 'google/gemma-3-27b-it',
        'alias': 'Gemma 3 27B'
    },
    'deepseek-r1': {
        'name': 'deepseek-ai/DeepSeek-R1',
        'alias': 'Deepseek R1'
    },
    'qwen_25_72b': {
        'name': 'Qwen/Qwen2.5-72B-Instruct',
        'alias': 'QWQ 32B'
    },
    'llama_33': {
        'name': 'unsloth/Llama-3.3-70B-Instruct-bnb-4bit',
        'alias': 'Llama 3.3 70B'
    },
    'gemma2_2b': {
        'name': 'google/gemma-2-2b-it',
        'alias': 'Gemma 2 2B'
    },
    'bert-large-uncased': {
        'name': 'google-bert/bert-large-uncased',
        'alias': 'Bert Large Uncased'
    },
    'gpt2': {
        'name': 'openai-community/gpt2',
        'alias': 'GPT-2'
    }
}

# Initialize tokenizers dict
tokenizers = {}

def load_tokenizer(model_id):
    """Load tokenizer if not already loaded"""
    if model_id not in tokenizers:
        tokenizers[model_id] = AutoTokenizer.from_pretrained(TOKENIZER_MODELS[model_id]['name'])
    return tokenizers[model_id]

def get_varied_color(token: str) -> dict:
    """Generate vibrant colors with HSL for better visual distinction."""
    token_hash = hashlib.md5(token.encode()).hexdigest()
    hue = int(token_hash[:3], 16) % 360
    saturation = 70 + (int(token_hash[3:5], 16) % 20)
    lightness = 80 + (int(token_hash[5:7], 16) % 10)
    text_lightness = 20 if lightness > 50 else 90
    
    return {
        'background': f'hsl({hue}, {saturation}%, {lightness}%)',
        'text': f'hsl({hue}, {saturation}%, {text_lightness}%)'
    }

def fix_token(token: str) -> str:
    """Fix token for display with improved space visualization."""
    if token.startswith('Ġ'):
        space_count = token.count('Ġ')
        return '·' * space_count + token[space_count:]
    return token

def get_token_stats(tokens: list, original_text: str) -> dict:
    """Calculate enhanced statistics about the tokens."""
    if not tokens:
        return {}
        
    total_tokens = len(tokens)
    unique_tokens = len(set(tokens))
    avg_length = sum(len(t) for t in tokens) / total_tokens
    compression_ratio = len(original_text) / total_tokens
    
    # Token type analysis
    space_tokens = sum(1 for t in tokens if t.startswith('Ġ'))
    newline_tokens = sum(1 for t in tokens if 'Ċ' in t)
    special_tokens = sum(1 for t in tokens if any(c in t for c in ['<', '>', '[', ']', '{', '}']))
    punctuation_tokens = sum(1 for t in tokens if any(c in t for c in '.,!?;:()'))
    
    # Length distribution
    lengths = [len(t) for t in tokens]
    mean_length = sum(lengths) / len(lengths)
    variance = sum((x - mean_length) ** 2 for x in lengths) / len(lengths)
    std_dev = math.sqrt(variance)
    
    return {
        'basic_stats': {
            'total_tokens': total_tokens,
            'unique_tokens': unique_tokens,
            'compression_ratio': round(compression_ratio, 2),
            'space_tokens': space_tokens,
            'newline_tokens': newline_tokens,
            'special_tokens': special_tokens,
            'punctuation_tokens': punctuation_tokens,
            'unique_percentage': round(unique_tokens/total_tokens * 100, 1)
        },
        'length_stats': {
            'avg_length': round(avg_length, 2),
            'std_dev': round(std_dev, 2),
            'min_length': min(lengths),
            'max_length': max(lengths),
            'median_length': sorted(lengths)[len(lengths)//2]
        }
    }

def process_text(text: str, model_id: str, is_full_file: bool = False, file_path: str = None) -> dict:
    """Process text and return tokenization data."""
    tokenizer = load_tokenizer(model_id)
    
    # For file uploads, read only preview from file but process full file for stats
    if file_path and is_full_file:
        # Read the preview for display
        with open(file_path, 'r', errors='replace') as f:
            preview_text = f.read(8096)
        
        # Tokenize preview for display
        preview_tokens = tokenizer.tokenize(preview_text)
        display_tokens = preview_tokens[:50000]
        
        # Process full file for stats in chunks to avoid memory issues
        total_tokens = []
        token_set = set()
        total_length = 0
        chunk_size = 1024 * 1024  # 1MB chunks
        
        with open(file_path, 'r', errors='replace') as f:
            while True:
                chunk = f.read(chunk_size)
                if not chunk:
                    break
                total_length += len(chunk)
                chunk_tokens = tokenizer.tokenize(chunk)
                total_tokens.extend(chunk_tokens)
                token_set.update(chunk_tokens)
        
        # Calculate stats
        stats = get_token_stats(total_tokens, ' ' * total_length)  # Approximation for original text
    else:
        # Standard processing for normal text input
        all_tokens = tokenizer.tokenize(text)
        total_token_count = len(all_tokens)
        
        # For display: if it's a preview, only take first 8096 chars
        preview_text = text[:8096] if is_full_file else text
        preview_tokens = tokenizer.tokenize(preview_text)
        display_tokens = preview_tokens[:50000]
        
        # Always use full text for stats
        stats = get_token_stats(all_tokens, text)
        
    # Format tokens for display
    token_data = []
    for idx, token in enumerate(display_tokens):
        colors = get_varied_color(token)
        fixed_token = fix_token(token)
        # Compute the numerical token ID from the tokenizer
        token_id = tokenizer.convert_tokens_to_ids(token)
        token_data.append({
            'original': token,
            'display': fixed_token[:-1] if fixed_token.endswith('Ċ') else fixed_token,
            'colors': colors,
            'newline': fixed_token.endswith('Ċ'),
            'token_id': token_id,
            'token_index': idx
        })
    
    # Use the appropriate token count based on processing method
    total_token_count = len(total_tokens) if file_path and is_full_file else len(all_tokens)
    
    return {
        'tokens': token_data,
        'stats': stats,
        'display_limit_reached': total_token_count > 50000 and not is_full_file,
        'total_tokens': total_token_count,
        'is_full_file': is_full_file,
        'preview_only': is_full_file
    }

# HTML template with enhanced modern styling
HTML_TEMPLATE = """
<!DOCTYPE html>
<html>
<head>
    <title>Token Visualizer Pro</title>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
<style>
  :root {
    --primary-color: #0f4f9b; /* Blue accent */
    --primary-hover: #0c3e7a; /* Darker blue accent */
    --bg-color: #121212;      /* Dark background */
    --card-bg: #1e1e1e;       /* Dark card background */
    --card-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.7),
                   0 2px 4px -1px rgba(0, 0, 0, 0.6);
    --transition: all 0.3s ease;
    --text-color: #E0E0E0;    /* Main text color */
    --secondary-text: #A0A0A0;/* Secondary text color */
    --input-bg: #2a2a2a;      /* Input/textarea background */
    --input-border: #444444;  /* Input/textarea border */
    --input-focus: #0f4f9b;   /* Focus border color */
  }

  * {
    margin: 0;
    padding: 0;
    box-sizing: border-box;
    font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
    scrollbar-width: thin;
    scrollbar-color: #0f4f9b #121212
  }

  /* Width and height of the scrollbar */
::-webkit-scrollbar {
  width: 12px;
  height: 12px;
}

/* Track (background) */
::-webkit-scrollbar-track {
  background: #121212;
  border-radius: 10px;
}

/* Handle (draggable part) */
::-webkit-scrollbar-thumb {
  background: #0f4f9b;
  border-radius: 10px;
  border: 2px solid #121212;
}

/* Handle on hover */
::-webkit-scrollbar-thumb:hover {
  background: #0c3e7a;
}


  body {
    background-color: var(--bg-color);
    padding: 2rem;
    min-height: 100vh;
    background-image:
      radial-gradient(circle at 20% 20%, rgba(15, 79, 155, 0.1) 0%, transparent 50%),
      radial-gradient(circle at 80% 80%, rgba(15, 79, 155, 0.1) 0%, transparent 50%);
    color: var(--text-color);
  }

  .container {
    max-width: 1200px;
    margin: 0 auto;
  }

  .header {
    display: flex;
    justify-content: space-between;
    align-items: center;
    margin-bottom: 2rem;
    position: relative;
  }

  .title-section {
    flex-grow: 1;
  }

  .title {
    font-size: 2.5rem;
    font-weight: 800;
    color: var(--primary-color);
    margin-bottom: 0.5rem;
  }

  .subtitle {
    color: var(--secondary-text);
    font-size: 1.1rem;
  }

  .model-selector {
    position: relative;
    min-width: 200px;
  }

  select {
    width: 100%;
    padding: 0.75rem 1rem;
    border: 2px solid var(--input-border);
    border-radius: 0.5rem;
    font-size: 1rem;
    color: var(--text-color);
    background-color: var(--input-bg);
    cursor: pointer;
    transition: var(--transition);
    appearance: none;
    background-image: url("data:image/svg+xml,%3Csvg xmlns='http://www.w3.org/2000/svg' viewBox='0 0 24 24' fill='%230f4f9b'%3E%3Cpath d='M7 10l5 5 5-5H7z'/%3E%3C/svg%3E");
    background-repeat: no-repeat;
    background-position: right 1rem center;
    background-size: 1.5rem;
  }

  select:hover {
    border-color: var(--primary-color);
  }

  select:focus {
    outline: none;
    border-color: var(--primary-color);
    box-shadow: 0 0 0 3px rgba(15, 79, 155, 0.1);
  }

  .input-section {
    margin-bottom: 2rem;
  }

  textarea {
    width: 100%;
    height: 150px;
    padding: 1.25rem;
    border: 2px solid var(--input-border);
    border-radius: 0.75rem;
    resize: vertical;
    font-size: 1rem;
    margin-bottom: 1rem;
    transition: var(--transition);
    background-color: var(--input-bg);
    color: var(--text-color);
  }

  textarea:focus {
    outline: none;
    border-color: var(--input-focus);
    box-shadow: 0 0 0 3px rgba(15, 79, 155, 0.1);
  }

  .button-container {
    display: flex;
    justify-content: center;
    width: 100%;
    gap: 1rem;
  }

  button {
    padding: 0.875rem 2.5rem;
    background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
    color: #fff;
    border: none;
    border-radius: 0.75rem;
    font-size: 1.1rem;
    font-weight: 600;
    cursor: pointer;
    transition: var(--transition);
    box-shadow: 0 4px 6px -1px rgba(15, 79, 155, 0.2);
  }

  button:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 8px -1px rgba(15, 79, 155, 0.3);
  }

  button:active {
    transform: translateY(0);
  }

  button:disabled {
    opacity: 0.7;
    cursor: not-allowed;
  }

  .card {
    background-color: var(--card-bg);
    border-radius: 1rem;
    box-shadow: var(--card-shadow);
    padding: 1.5rem;
    margin-bottom: 2rem;
    transition: var(--transition);
  }

  .card:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 12px -2px rgba(0, 0, 0, 0.1);
  }

  .card-title {
    font-size: 1.25rem;
    font-weight: 700;
    color: var(--text-color);
    margin-bottom: 1.25rem;
    display: flex;
    align-items: center;
    gap: 0.5rem;
    cursor: pointer;
  }

  .card-title::before {
    content: '';
    display: block;
    width: 4px;
    height: 1.25rem;
    background: linear-gradient(135deg, var(--primary-color) 0%, var(--primary-hover) 100%);
    border-radius: 2px;
  }

  .token-container {
    display: flex;
    flex-wrap: wrap;
    gap: 0.375rem;
    margin-bottom: 1rem;
    padding: 1rem;
    background-color: #2a2a2a;
    border-radius: 0.5rem;
    max-height: 200px;
    overflow-y: auto;
    transition: max-height 0.3s ease;
  }

  .token-container.expanded {
    max-height: none;
  }

  .token {
    padding: 0.375rem 0.75rem;
    border-radius: 0.375rem;
    background-color: var(--input-bg);
    font-family: 'SF Mono', 'Monaco', 'Inconsolata', 'Fira Mono', 'Droid Sans Mono', 'Source Code Pro', monospace;
    font-size: 0.875rem;
    color: var(--text-color);
    cursor: default;
    transition: var(--transition);
    box-shadow: 0 1px 2px rgba(0, 0, 0, 0.05);
  }

  .token:hover {
    transform: translateY(-1px);
    box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
  }

  .stats-grid {
    display: grid;
    grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
    gap: 1.5rem;
    margin-bottom: 2rem;
  }

  .stat-card {
    background-color: var(--card-bg);
    padding: 1.5rem;
    border-radius: 1rem;
    box-shadow: var(--card-shadow);
    transition: var(--transition);
  }

  .stat-card:hover {
    transform: translateY(-2px);
    box-shadow: 0 6px 12px -2px rgba(0, 0, 0, 0.1);
  }

  .stat-title {
    color: var(--secondary-text);
    font-size: 0.875rem;
    font-weight: 500;
    margin-bottom: 0.5rem;
    text-transform: uppercase;
    letter-spacing: 0.05em;
  }

  .stat-value {
    color: var(--text-color);
    font-size: 2rem;
    font-weight: 700;
    line-height: 1.2;
    margin-bottom: 0.25rem;
  }

  .stat-description {
    color: var(--secondary-text);
    font-size: 0.875rem;
  }

  .expand-button {
    background: none;
    border: none;
    color: var(--primary-color);
    font-size: 0.875rem;
    padding: 0.5rem;
    cursor: pointer;
    display: block;
    margin: 0 auto;
    box-shadow: none;
  }

  .expand-button:hover {
    text-decoration: underline;
    transform: none;
    box-shadow: none;
  }

  .error-message {
    color: #EF4444;
    background-color: #3a1f1f;
    border: 1px solid #562626;
    padding: 1rem;
    border-radius: 0.5rem;
    margin-bottom: 1rem;
    display: none;
  }

  .display-limit-notice {
    background-color: #4b2b07;
    border: 1px solid #7c4a02;
    color: #FFD591;
    padding: 0.75rem;
    border-radius: 0.5rem;
    margin-top: 1rem;
    font-size: 0.875rem;
    display: none;
  }

  /* File drop zone styles */
  .file-drop-zone {
    position: fixed;
    top: 0;
    left: 0;
    width: 100%;
    height: 100%;
    background-color: rgba(15, 79, 155, 0.15);
    z-index: 1000;
    display: flex;
    justify-content: center;
    align-items: center;
    opacity: 0;
    pointer-events: none;
    transition: opacity 0.3s ease;
  }

  .file-drop-zone.active {
    opacity: 1;
    pointer-events: all;
  }

  .drop-indicator {
    background-color: var(--card-bg);
    border: 2px dashed var(--primary-color);
    border-radius: 1rem;
    padding: 2rem;
    text-align: center;
    width: 60%;
    max-width: 400px;
    box-shadow: 0 8px 32px rgba(0, 0, 0, 0.25);
    animation: pulse 2s infinite;
  }

  @keyframes pulse {
    0% { transform: scale(1); }
    50% { transform: scale(1.05); }
    100% { transform: scale(1); }
  }

  .drop-indicator p {
    margin-bottom: 0.5rem;
    color: var(--text-color);
    font-size: 1.2rem;
  }

  .file-icon {
    font-size: 3rem;
    margin-bottom: 1rem;
    color: var(--primary-color);
  }

  .file-upload-icon {
    position: fixed;
    bottom: 20px;
    left: 20px;
    width: 45px;
    height: 45px;
    background-color: var(--card-bg);
    border-radius: 50%;
    display: flex;
    justify-content: center;
    align-items: center;
    cursor: pointer;
    z-index: 100;
    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2);
    transition: transform 0.2s ease, box-shadow 0.2s ease;
  }

  .file-upload-icon:hover {
    transform: translateY(-2px);
    box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3);
  }

  .file-upload-icon span {
    font-size: 1.5rem;
    color: var(--primary-color);
  }

  .file-info {
    position: fixed;
    bottom: 20px;
    left: 75px;
    background-color: var(--card-bg);
    color: var(--primary-color);
    font-weight: 500;
    padding: 0.5rem 1rem;
    border-radius: 1rem;
    box-shadow: 0 2px 10px rgba(0, 0, 0, 0.2);
    max-width: 270px;
    white-space: nowrap;
    overflow: hidden;
    text-overflow: ellipsis;
    z-index: 100;
    display: none;
  }
  
  .file-detach {
    margin-left: 8px;
    display: inline-block;
    width: 18px;
    height: 18px;
    background-color: rgba(255, 255, 255, 0.1);
    color: var(--text-color);
    border-radius: 50%;
    text-align: center;
    line-height: 16px;
    font-size: 12px;
    cursor: pointer;
    transition: all 0.2s ease;
  }
  
  .file-detach:hover {
    background-color: rgba(255, 0, 0, 0.2);
    color: #ff6b6b;
    transform: scale(1.1);
  }

  .preview-notice {
    background-color: #273c56;
    border: 1px solid #365a82;
    color: #89b4e8;
    padding: 0.75rem;
    border-radius: 0.5rem;
    margin-top: 1rem;
    font-size: 0.875rem;
    display: none;
  }

  @media (max-width: 768px) {
    .header {
      flex-direction: column;
      align-items: stretch;
      gap: 1rem;
    }
    
    .model-selector {
      width: 100%;
    }
    
    .stats-grid {
      grid-template-columns: 1fr;
    }
  }
</style>
</head>
<body>
    <!-- Hidden File Drop Zone that appears when dragging files -->
    <div id="fileDropZone" class="file-drop-zone">
        <div class="drop-indicator">
            <div class="file-icon">📄</div>
            <p>Drop your file here</p>
        </div>
    </div>

    <!-- File upload icon in bottom left corner -->
    <div id="fileUploadIcon" class="file-upload-icon">
        <span>📎</span>
    </div>
    <p class="file-info" id="fileInfo"></p>

    <div class="container">
        <div class="header">
            <div class="title-section">
                <h1 class="title">Token Visualizer</h1>
                <p class="subtitle">Advanced tokenization analysis and visualization</p>
            </div>
            <div class="model-selector">
                <select id="modelSelect" name="model">
                    {% for model_id, info in models.items() %}
                    <option value="{{ model_id }}" {% if selected_model == model_id %}selected{% endif %}>
                        {{ info.alias }}
                    </option>
                    {% endfor %}
                </select>
            </div>
        </div>

        <div class="error-message" id="errorMessage"></div>

        <div class="input-section">
            <form id="analyzeForm" method="POST" enctype="multipart/form-data">
                <textarea name="text" id="textInput" placeholder="Enter text to analyze or upload a file in bottom left corner...">{{ text }}</textarea>
                <input type="hidden" name="model" id="modelInput" value="{{ selected_model }}">
                <input type="file" name="file" id="fileInput" style="display: none;">
                <div class="button-container">
                    <button type="submit" id="analyzeButton">Analyze Text</button>
                </div>
            </form>
        </div>

        <div id="results" class="results" {% if not token_data %}style="display: none;"{% endif %}>
            <div class="card">
                <h2 class="card-title">Token Visualization</h2>
                <div class="preview-notice" id="previewNotice">
                    Note: Showing preview of first 8096 characters. Stats are calculated on the full file.
                </div>
                <div class="token-container" id="tokenContainer">
                    {% if token_data %}
                    {% for token in token_data.tokens %}
                    <span class="token" 
                          style="background-color: {{ token.colors.background }}; color: {{ token.colors.text }};"
                          title="Original token: {{ token.original }} | Token ID: {{ token.token_id }}">
                        {{ token.display }}
                    </span>
                    {% if token.newline %}<br>{% endif %}
                    {% endfor %}
                    {% endif %}
                </div>
                <button class="expand-button" id="expandButton">Show More</button>
                <div class="display-limit-notice" id="displayLimitNotice">
                    Note: Only showing first 50,000 tokens. Total token count: <span id="totalTokenCount">0</span>
                </div>
            </div>

            <div class="stats-grid">
                <div class="stat-card">
                    <div class="stat-title">Total Tokens</div>
                    <div class="stat-value" id="totalTokens">{{ token_data.stats.basic_stats.total_tokens if token_data else 0 }}</div>
                    <div class="stat-description">
                        <span id="uniqueTokens">{{ token_data.stats.basic_stats.unique_tokens if token_data else 0 }} unique</span>
                        (<span id="uniquePercentage">{{ token_data.stats.basic_stats.unique_percentage if token_data else 0 }}</span>%)
                    </div>
                </div>
                <div class="stat-card">
                    <div class="stat-title">Token Types</div>
                    <div class="stat-value" id="specialTokens">{{ token_data.stats.basic_stats.special_tokens if token_data else 0 }}</div>
                    <div class="stat-description">special tokens</div>
                </div>
                <div class="stat-card">
                    <div class="stat-title">Whitespace</div>
                    <div class="stat-value" id="spaceTokens">{{ token_data.stats.basic_stats.space_tokens if token_data else 0 }}</div>
                    <div class="stat-description">
                        spaces: <span id="spaceCount">{{ token_data.stats.basic_stats.space_tokens if token_data else 0 }}</span>,
                        newlines: <span id="newlineCount">{{ token_data.stats.basic_stats.newline_tokens if token_data else 0 }}</span>
                    </div>
                </div>
                <div class="stat-card">
                    <div class="stat-title">Token Length</div>
                    <div class="stat-value" id="avgLength">{{ token_data.stats.length_stats.avg_length if token_data else 0 }}</div>
                    <div class="stat-description">
                        median: <span id="medianLength">{{ token_data.stats.length_stats.median_length if token_data else 0 }}</span>,
                        ±<span id="stdDev">{{ token_data.stats.length_stats.std_dev if token_data else 0 }}</span> std
                    </div>
                </div>
                <div class="stat-card">
                    <div class="stat-title">Compression</div>
                    <div class="stat-value" id="compressionRatio">{{ token_data.stats.basic_stats.compression_ratio if token_data else 0 }}</div>
                    <div class="stat-description">characters per token</div>
                </div>
            </div>
        </div>
    </div>

    <script>
        $(document).ready(function() {
            // File handling variables
            let currentFile = null;
            let originalTextContent = null;
            let lastUploadedFileName = null;
            let fileJustUploaded = false;  // Flag to prevent immediate detachment

            function showError(message) {
                const errorDiv = $('#errorMessage');
                errorDiv.text(message);
                errorDiv.show();
                setTimeout(() => errorDiv.fadeOut(), 5000);
            }

            function updateResults(data) {
                $('#results').show();
                
                // Update tokens
                const tokenContainer = $('#tokenContainer');
                tokenContainer.empty();
                data.tokens.forEach(token => {
                    const span = $('<span>')
                        .addClass('token')
                        .css({
                            'background-color': token.colors.background,
                            'color': token.colors.text
                        })
                        // Include token id in the tooltip on hover
                        .attr('title', `Original token: ${token.original} | Token ID: ${token.token_id}`)
                        .text(token.display);
                    
                    tokenContainer.append(span);
                    if (token.newline) {
                        tokenContainer.append('<br>');
                    }
                });

                // Update display limit notice
                if (data.display_limit_reached) {
                    $('#displayLimitNotice').show();
                    $('#totalTokenCount').text(data.total_tokens);
                } else {
                    $('#displayLimitNotice').hide();
                }

                // Update preview notice
                if (data.preview_only) {
                    $('#previewNotice').show();
                } else {
                    $('#previewNotice').hide();
                }

                // Update basic stats
                $('#totalTokens').text(data.stats.basic_stats.total_tokens);
                $('#uniqueTokens').text(`${data.stats.basic_stats.unique_tokens} unique`);
                $('#uniquePercentage').text(data.stats.basic_stats.unique_percentage);
                $('#specialTokens').text(data.stats.basic_stats.special_tokens);
                $('#spaceTokens').text(data.stats.basic_stats.space_tokens);
                $('#spaceCount').text(data.stats.basic_stats.space_tokens);
                $('#newlineCount').text(data.stats.basic_stats.newline_tokens);
                $('#compressionRatio').text(data.stats.basic_stats.compression_ratio);

                // Update length stats
                $('#avgLength').text(data.stats.length_stats.avg_length);
                $('#medianLength').text(data.stats.length_stats.median_length);
                $('#stdDev').text(data.stats.length_stats.std_dev);
            }

            // Handle text changes to detach file
            $('#textInput').on('input', function() {
                // Skip if file was just uploaded (prevents immediate detachment)
                if (fileJustUploaded) {
                    fileJustUploaded = false;
                    return;
                }
                
                const currentText = $(this).val();
                const fileInput = document.getElementById('fileInput');
                
                // Only detach if a file exists and text has been substantially modified
                if (fileInput.files.length > 0 && originalTextContent !== null) {
                    // Check if the text is completely different or has been significantly changed
                    // This allows for small edits without detaching
                    const isMajorChange = 
                        currentText.length < originalTextContent.length * 0.8 || // Text reduced by at least 20%
                        (currentText.length > 0 && 
                         currentText !== originalTextContent.substring(0, currentText.length) && 
                         currentText.substring(0, Math.min(20, currentText.length)) !== 
                         originalTextContent.substring(0, Math.min(20, currentText.length)));
                    
                    if (isMajorChange) {
                        detachFile();
                    }
                }
            });
            
            // Function to detach file
            function detachFile() {
                // Clear the file input
                $('#fileInput').val('');
                // Hide file info
                $('#fileInfo').fadeOut(300);
                // Reset the original content tracker
                originalTextContent = $('#textInput').val();
                // Reset last uploaded filename
                lastUploadedFileName = null;
            }
            
            // For model changes
            $('#modelSelect').change(function() {
                const selectedModel = $(this).val();
                $('#modelInput').val(selectedModel);
                
                // If text exists, submit the form
                if ($('#textInput').val().trim()) {
                    $('#analyzeForm').submit();
                }
            });

            // File drop handling
            const fileDropZone = $('#fileDropZone');
            const fileUploadIcon = $('#fileUploadIcon');
            
            // Prevent default drag behaviors
            ['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
                fileDropZone[0].addEventListener(eventName, preventDefaults, false);
                document.body.addEventListener(eventName, preventDefaults, false);
            });
            
            function preventDefaults(e) {
                e.preventDefault();
                e.stopPropagation();
            }

            // Show drop zone when file is dragged over the document
            document.addEventListener('dragenter', showDropZone, false);
            document.addEventListener('dragover', showDropZone, false);
            
            fileDropZone[0].addEventListener('dragleave', hideDropZone, false);
            fileDropZone[0].addEventListener('drop', hideDropZone, false);
            
            function showDropZone(e) {
                fileDropZone.addClass('active');
            }
            
            function hideDropZone() {
                fileDropZone.removeClass('active');
            }

            // Handle dropped files
            fileDropZone[0].addEventListener('drop', handleDrop, false);
            
            function handleDrop(e) {
                const dt = e.dataTransfer;
                const files = dt.files;
                handleFiles(files);
            }
            
            // Also handle file selection via click on the icon
            fileUploadIcon.on('click', function() {
                const input = document.createElement('input');
                input.type = 'file';
                input.onchange = e => {
                    handleFiles(e.target.files);
                };
                input.click();
            });
            
            function handleFiles(files) {
                if (files.length) {
                    const file = files[0];
                    currentFile = file;
                    lastUploadedFileName = file.name;
                    fileJustUploaded = true; // Set flag to prevent immediate detachment
                    
                    // Show file info with animation and add detach button
                    $('#fileInfo').html(`${file.name} (${formatFileSize(file.size)}) <span class="file-detach" id="fileDetach"><i class="fas fa-times"></i></span>`).fadeIn(300);
                    
                    // Add click handler for detach button
                    $('#fileDetach').on('click', function(e) {
                        e.stopPropagation(); // Prevent event bubbling
                        detachFile();
                        return false;
                    });
                    
                    // Set the file to the file input
                    const dataTransfer = new DataTransfer();
                    dataTransfer.items.add(file);
                    document.getElementById('fileInput').files = dataTransfer.files;
                    
                    // Preview in textarea (first 8096 chars)
                    const reader = new FileReader();
                    reader.onload = function(e) {
                        const previewText = e.target.result.slice(0, 8096);
                        $('#textInput').val(previewText);
                        
                        // Store this as the original content AFTER setting the value
                        // to prevent the input event from firing and detaching immediately
                        setTimeout(() => {
                            originalTextContent = previewText;
                            // Automatically submit for analysis
                            $('#analyzeForm').submit();
                        }, 50);
                    };
                    reader.readAsText(file);
                }
            }
            
            function formatFileSize(bytes) {
                if (bytes < 1024) return bytes + ' bytes';
                else if (bytes < 1048576) return (bytes / 1024).toFixed(1) + ' KB';
                else return (bytes / 1048576).toFixed(1) + ' MB';
            }

            // Make sure to check if there's still a file when analyzing
            $('#analyzeForm').on('submit', function(e) {
                e.preventDefault();
                
                // Skip detachment check if file was just uploaded
                if (!fileJustUploaded) {
                    // Check if text has been changed but file is still attached
                    const textInput = $('#textInput').val();
                    const fileInput = document.getElementById('fileInput');
                    
                    if (fileInput.files.length > 0 && 
                        originalTextContent !== null && 
                        textInput !== originalTextContent && 
                        textInput.length < originalTextContent.length * 0.8) {
                        // Text was significantly changed but file is still attached, detach it
                        detachFile();
                    }
                } else {
                    // Reset flag after first submission
                    fileJustUploaded = false;
                }
                
                const formData = new FormData(this);
                $('#analyzeButton').prop('disabled', true);
                
                $.ajax({
                    url: '/',
                    method: 'POST',
                    data: formData,
                    processData: false,
                    contentType: false,
                    success: function(response) {
                        if (response.error) {
                            showError(response.error);
                        } else {
                            updateResults(response);
                        }
                    },
                    error: function(xhr) {
                        showError(xhr.responseText || 'An error occurred while processing the text');
                    },
                    complete: function() {
                        $('#analyzeButton').prop('disabled', false);
                    }
                });
            });

            $('#expandButton').click(function() {
                const container = $('#tokenContainer');
                const isExpanded = container.hasClass('expanded');
                
                container.toggleClass('expanded');
                $(this).text(isExpanded ? 'Show More' : 'Show Less');
            });
        });
    </script>
</body>
</html>
"""

@app.route('/', methods=['GET', 'POST'])
def index():
    text = ""
    token_data = None
    selected_model = request.args.get('model', request.form.get('model', 'mistral-small'))
    
    if request.method == 'POST':
        # Check if file upload
        if 'file' in request.files and request.files['file'].filename:
            uploaded_file = request.files['file']
            # Save file to tmp directory
            file_path = os.path.join(app.config['UPLOAD_FOLDER'], uploaded_file.filename)
            uploaded_file.save(file_path)
            
            # Read a small preview of the file
            with open(file_path, 'r', errors='replace') as f:
                text = f.read(8096)
            
            try:
                # Process the file
                token_data = process_text("", selected_model, is_full_file=True, file_path=file_path)
                
                # Clean up the file after processing
                if os.path.exists(file_path):
                    os.remove(file_path)
                
                # If request is AJAX, return JSON
                if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
                    return jsonify(token_data)
                    
            except Exception as e:
                error_message = str(e)
                # Clean up the file after processing
                if os.path.exists(file_path):
                    os.remove(file_path)
                
                if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
                    return jsonify({"error": error_message}), 400
                return render_template_string(
                    HTML_TEMPLATE,
                    text=text,
                    token_data=None,
                    models=TOKENIZER_MODELS,
                    selected_model=selected_model,
                    error=error_message
                )
        
        # Regular text processing
        else:
            text = request.form.get('text', '')
            if text:
                try:
                    token_data = process_text(text, selected_model)
                    
                    # If request is AJAX, return JSON
                    if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
                        return jsonify(token_data)
                        
                except Exception as e:
                    error_message = str(e)
                    if request.headers.get('X-Requested-With') == 'XMLHttpRequest':
                        return jsonify({"error": error_message}), 400
                    return render_template_string(
                        HTML_TEMPLATE,
                        text=text,
                        token_data=None,
                        models=TOKENIZER_MODELS,
                        selected_model=selected_model,
                        error=error_message
                    )
    
    return render_template_string(
        HTML_TEMPLATE,
        text=text,
        token_data=token_data,
        models=TOKENIZER_MODELS,
        selected_model=selected_model
    )

if __name__ == "__main__":
    app.run(host='0.0.0.0', port=7860)