import gradio as gr
import json
import time
import requests
from groq import Groq
import google.generativeai as genai
from datetime import datetime, timedelta
import pytz
from playwright.async_api import async_playwright
import asyncio
import random
from fake_useragent import UserAgent
from urllib.parse import urlparse, urljoin
from tenacity import retry, stop_after_attempt, wait_exponential
import os
import subprocess

# Install Playwright and browsers
# subprocess.run(["playwright", "install"], check=True)
# subprocess.run(["playwright", "install-deps"], check=True)

# os.system("apt-get update")
os.system(
    "apt-get install -y libnss3 libnspr4 libatk1.0-0 libatk-bridge2.0-0 libcups2 "
    "libxcomposite1 libxdamage1 libatspi2.0-0 libxrandr2 libgbm1 libpango-1.0-0 "
    "libasound2 libxshmfence1 libwayland-server0 libwayland-client0 "
    "libgdk-pixbuf2.0-0"
)


# Install Playwright browsers
# subprocess.run(["sudo","playwright", "install"], check=True)

# Install Playwright dependencies
# subprocess.run(["sudo","playwright", "install-deps"], check=True)

# print("Playwright and its dependencies have been installed successfully!")


# Constants
CHAPTERS_FILE = 'scraped_chapters.json'
SPLIT_CHAPTERS_FILE = 'split_scraped_chapters.json'
TRANSLATIONS_FILE = 'chapter_translated.json'
GLOSSARY_FILE = 'chapter_glossary.json'

ua = UserAgent()

# Function to scrape chapters from xbanxia
async def scrape_xbanxia(first_chapter_url, final_url=None):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        context = await browser.new_context(user_agent=ua.random)
        page = await context.new_page()
        try:
            page = await fetch_page(page, first_chapter_url)
            chapters = []
            next_url = first_chapter_url
            chapter_count = 0

            while next_url and (not final_url or next_url != final_url):
                try:
                    if chapter_count % 5 == 0:
                        await context.set_extra_http_headers({"User-Agent": ua.random})

                    page = await fetch_page(page, next_url)

                    # Wait for content to load
                    await page.wait_for_selector('#nr_title', state='visible', timeout=60000)
                    await page.wait_for_selector('#nr1', state='visible', timeout=60000)

                    # Extract title
                    title_element = await page.query_selector('#nr_title')
                    title = await title_element.inner_text() if title_element else None

                    # Extract content
                    content_element = await page.query_selector('#nr1')
                    content = await content_element.inner_text() if content_element else None

                    # Extract next URL
                    next_link = await page.query_selector('.nav2 .next a')
                    next_url = await next_link.get_attribute('href') if next_link else None

                    if next_url and not next_url.startswith('http'):
                        base_url = '/'.join(first_chapter_url.split('/')[:3])
                        next_url = base_url + next_url

                    if title and content:
                        # Clean up the content
                        content_lines = content.split('\n')
                        clean_content = '\n'.join(line.strip() for line in content_lines
                                                if line.strip() and not line.strip().startswith('第'))

                        chapters.append({
                            'title': title.strip(),
                            'content': clean_content,
                            'url': next_url
                        })

                    print(f"Scraped chapter {chapter_count + 1}: {title}")
                    chapter_count += 1

                    # Random delay between requests
                    await asyncio.sleep(random.uniform(2, 5))

                except Exception as e:
                    print(f"Error scraping chapter at {next_url}: {str(e)}")
                    await asyncio.sleep(60)  # Wait for 1 minute before retrying

            await browser.close()
            return chapters

        except Exception as e:
            print(f"An error occurred during scraping: {str(e)}")
            await browser.close()
            return None

# Function to scrape chapters from 69shuba.cx
async def scrape_69shu(first_chapter_url, final_url=None):
    async with async_playwright() as p:
        browser = await p.chromium.launch()
        context = await browser.new_context(user_agent=ua.random)
        page = await context.new_page()

        try:
            # Navigate to the first chapter
            page = await fetch_page(page, first_chapter_url)

            chapters = []
            next_url = first_chapter_url
            chapter_count = 0

            while next_url and (not final_url or next_url != final_url):
                try:
                    # Change user agent every 5 chapters
                    if chapter_count % 5 == 0:
                        await context.set_extra_http_headers({"User-Agent": ua.random})

                    page = await fetch_page(page, next_url)
                    await page.wait_for_selector('.txtnav', state='visible', timeout=60000)

                    # Extract title
                    title_element = await page.query_selector('.txtnav h1')
                    title = await title_element.inner_text() if title_element else None

                    if not title:
                        title_element = await page.query_selector('.txtnav')
                        if title_element:
                            title_text = await title_element.inner_text()
                            title = title_text.split('\n')[0].strip()

                    # Extract content
                    content_element = await page.query_selector('.txtnav')
                    content = await content_element.inner_text() if content_element else None

                    # Extract next URL
                    next_link = await page.query_selector('.page1 a:nth-child(4)')
                    next_url = await next_link.get_attribute('href') if next_link else None

                    if title and content:
                        # Clean up the content
                        content_lines = content.split('\n')
                        clean_content = '\n'.join(line.strip() for line in content_lines if line.strip() and not line.strip().startswith('Chapter'))

                        chapters.append({
                            'title': title,
                            'content': clean_content,
                            'url': next_url
                        })

                    print(f"Scraped chapter {chapter_count + 1}: {title}")
                    chapter_count += 1

                    # Add a random delay between requests
                    await asyncio.sleep(random.uniform(2, 5))

                except Exception as e:
                    print(f"Error scraping chapter at {next_url}: {str(e)}")
                    await asyncio.sleep(60)  # Wait for 1 minute before trying the next chapter

            await browser.close()
            return chapters

        except Exception as e:
            print(f"An error occurred during scraping: {str(e)}")
            await browser.close()
            return None

# Function to fetch a page
@retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
async def fetch_page(page, url):
    await page.goto(url)
    await page.wait_for_load_state('networkidle')
    return page

# Function to scrape chapters based on the domain
async def scrape_task(first_chapter_url, final_url=None):
    """Scrape chapters and save to JSON file."""
    domain = urlparse(first_chapter_url).netloc
    if 'xbanxia' in domain:
        result = await scrape_xbanxia(first_chapter_url, final_url)
    elif '69shuba.cx' in domain:
        result = await scrape_69shu(first_chapter_url, final_url)
    else:
        print(f"Unsupported domain: {domain}")
        return
    if result:
        with open(CHAPTERS_FILE, 'w', encoding='utf-8') as f:
            json.dump(result, f, ensure_ascii=False, indent=2)
        print(f'Scraping completed. Data saved to {CHAPTERS_FILE}')
    else:
        print('Scraping failed or was interrupted.')

# Function to split long chapters
def split_long_chapter(title, content, max_length=2000):
    """
    Split a long chapter into multiple parts while preserving paragraph and sentence integrity.
    Splits occur at newline (\n) or sentence-ending symbol (。).
    """
    # Count only Chinese characters for length check
    chinese_char_count = sum(1 for char in content if '\u4e00' <= char <= '\u9fff')

    if chinese_char_count <= max_length:
        return [{"title": title, "content": content}]

    parts = []
    current_part = []
    current_chinese_count = 0
    part_number = 1

    # First split by paragraphs (newlines)
    paragraphs = content.split('\n')

    for paragraph in paragraphs:
        if not paragraph.strip():
            continue

        # Split paragraph into sentences
        sentences = paragraph.split('。')
        sentences = [s.strip() + '。' for s in sentences if s.strip()]

        for sentence in sentences:
            sentence_chinese_count = sum(1 for char in sentence if '\u4e00' <= char <= '\u9fff')

            # If adding this sentence would exceed the limit
            if current_chinese_count + sentence_chinese_count > max_length and current_part:
                # Save current part
                part_content = '\n'.join(current_part)
                parts.append({
                    "title": f"{title} Part {part_number}",
                    "content": part_content
                })
                # Start new part
                current_part = [sentence]
                current_chinese_count = sentence_chinese_count
                part_number += 1
            else:
                current_part.append(sentence)
                current_chinese_count += sentence_chinese_count

    # Save the last part if there's anything remaining
    if current_part:
        part_content = '\n'.join(current_part)
        parts.append({
            "title": f"{title} Part {part_number}",
            "content": part_content
        })

    return parts

# Function to process chapters
def process_chapters(input_file, output_file, max_length=5000):
    """
    Process chapters from an input JSON file, splitting long chapters if necessary,
    and save the result to an output JSON file.
    """
    try:
        # Check if the input file exists
        if not os.path.exists(input_file):
            raise FileNotFoundError(f"Input file '{input_file}' not found. Please ensure the scraping process runs first.")

        with open(input_file, 'r', encoding='utf-8') as f:
            chapters = json.load(f)

        processed_chapters = []
        for chapter in chapters:
            title = chapter['title']
            content = chapter['content']

            split_chapters = split_long_chapter(title, content, max_length)
            processed_chapters.extend(split_chapters)

        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(processed_chapters, f, ensure_ascii=False, indent=2)

        return len(processed_chapters)
    except Exception as e:
        print(f"Error processing chapters: {str(e)}")
        raise
    
def create_glossary(gemini_api_key, groq_api_key=None):
    """Create a glossary from random chapters using Groq or Gemini API."""
    with open(SPLIT_CHAPTERS_FILE, 'r', encoding='utf-8') as f:
        book_data = json.load(f)  # book_data is a list of chapters

    # Select 20 random chapters from the first 100 chapters
    random_chapters = random.sample(book_data, min(2, len(book_data)))

    preliminary_glossary = []

    for i, chapter in enumerate(random_chapters):
        max_retries = 3
        retry_count = 0

        while retry_count < max_retries:
            try:
                prompt = f"""Analyze the following Chinese web novel chapter and create a glossary of 5 important terms or names. Each entry should include the Chinese term and its English equivalent or explanation. Translate character names, locations names, unique concepts, cultivation levels, power levels, power techniques, or culturally specific terms to English.
The target audience are people from USA that don't know much about Chinese language and culture.
Very important Note: Only Use Pinyin for Character's Name.

Chinese chapter:
{chapter['content']}


Create a glossary of 5 terms in the following format:
Chinese Term: English Equivalent
for example: 朱士久 : Zhu Shijiu

"""

                if groq_api_key:
                    # Use Groq API if the key is provided
                    client = Groq(api_key=groq_api_key)
                    chat_completion = client.chat.completions.create(
                        messages=[{"role": "user", "content": prompt}],
                        model="llama3-70b-8192",
                        timeout=30
                    )
                    chapter_glossary = chat_completion.choices[0].message.content
                else:
                    # Fallback to Gemini API if Groq key is not provided
                    genai.configure(api_key=gemini_api_key)
                    gemini_model = genai.GenerativeModel(
                        model_name="gemini-1.5-flash",
                        generation_config={
                            "temperature": 1,
                            "top_p": 0.95,
                            "top_k": 64,
                            "max_output_tokens": 8192,
                        }
                    )
                    gemini_response = gemini_model.generate_content(prompt)
                    chapter_glossary = gemini_response.text

                preliminary_glossary.extend(chapter_glossary.split('\n'))
                print(f"Created glossary entries for chapter: {chapter['title']}")
                break
            except Exception as e:
                retry_count += 1
                if retry_count < max_retries:
                    print(f"Error processing chapter {chapter['title']}: {str(e)}")
                    print(f"Retrying in 60 seconds... (Attempt {retry_count + 1} of {max_retries})")
                    time.sleep(60)
                else:
                    print(f"Failed to process chapter {chapter['title']} after {max_retries} attempts: {str(e)}")

        time.sleep(5)

    # Refine the glossary
    refine_prompt = """Refine the following glossary for a Chinese web novel. Remove duplicates, redundant entries, and irrelevant words. Ensure consistency in naming and explanations.
Provide the output in JSON Format.
Preliminary Glossary:
{}

Retain people's names in Pinyin format (e.g., Chen Jingle), but fully translate all other terms, phrases, and concepts into English. Avoid using Pinyin for non-name elements to ensure clarity and natural flow for English readers.
Provide the refined glossary in the following format:
Chinese Characters: English Equivalent (No Explanations)
for example: 朱士久 : Zhu Shijiu
白家 : Bai Family
成长系统: Growth System ( not "Chengzhang Xitong)
""".format('\n'.join(preliminary_glossary))

    try:
        if groq_api_key:
            # Use Groq API for refinement if the key is provided
            client = Groq(api_key=groq_api_key)
            chat_completion = client.chat.completions.create(
                messages=[{"role": "user", "content": refine_prompt}],
                model="llama3-70b-8192",
                timeout=60
            )
            refined_glossary = chat_completion.choices[0].message.content
        else:
            # Fallback to Gemini API for refinement
            gemini_response = gemini_model.generate_content(refine_prompt)
            refined_glossary = gemini_response.text

        # Save the refined glossary
        with open(GLOSSARY_FILE, 'w', encoding='utf-8') as f:
            json.dump(refined_glossary.split('\n'), f, ensure_ascii=False, indent=2)
        print(f'Glossary creation completed. Glossary saved to {GLOSSARY_FILE}')

    except Exception as e:
        print(f"Error refining glossary: {str(e)}")
        raise

# Function to translate chapters
def translate_task(gemini_api_key, groq_api_key):
    # Configure Gemini
    genai.configure(api_key=gemini_api_key)
    gemini_model = genai.GenerativeModel(
        model_name="gemini-1.5-flash",
        generation_config={
            "temperature": 1,
            "top_p": 0.95,
            "top_k": 64,
            "max_output_tokens": 8192,
        }
    )

    # Load data and configuration
    with open(SPLIT_CHAPTERS_FILE, 'r', encoding='utf-8') as f:
        book_data = json.load(f)
    with open(GLOSSARY_FILE, 'r', encoding='utf-8') as f:
        glossary = json.load(f)
    formatted_glossary = "\n".join(glossary)

    # Configure Groq
    groq_client = Groq(api_key=groq_api_key) if groq_api_key else None

    translations = []

    for i, chapter in enumerate(book_data):
        prompt = f"""Translate the following Chinese web novel chapter to English. Maintain the original tone and style of the novel. Preserve any cultural references or idioms, providing brief explanations in parentheses if necessary.
If Paragraphs are stuck together, split them. Retain people's names in Pinyin format (e.g., Chen Jingle), but fully translate all other terms, phrases, and concepts into English. Avoid using Pinyin for non-name elements to ensure clarity and natural flow for English readers.
You should translate every chinese character to English. The chapter should be fully translated.
Glossary:
{formatted_glossary}

Chinese chapter:
{chapter['content']}
Note: No introductory sentences nor concluding sentences. Just directly provide the translation.
Translate the above text to English, using the glossary for consistent translations of key terms:"""

        translation = None

        # Try Gemini first
        print("Falling back to Gemini...")
        for attempt in range(2):
            try:
                gemini_response = gemini_model.generate_content(prompt)
                translation = gemini_response.text
                break
            except Exception as e:
                print(f"Gemini error (attempt {attempt + 1}): {str(e)}")
                if attempt == 1:
                    print("Gemini failed. Falling back to Groq LLaMA model")
                else:
                    time.sleep(30)

        # If Gemini failed, try Groq
        if not translation and groq_client:
            for attempt in range(2):
                try:
                    chat_completion = groq_client.chat.completions.create(
                        messages=[{"role": "user", "content": prompt}],
                        model="llama3-70b-8192",
                        timeout=30
                    )
                    translation = chat_completion.choices[0].message.content
                    break
                except Exception as e:
                    print(f"Groq error (attempt {attempt + 1}): {str(e)}")
                    if attempt == 1:
                        print(f"Failed to translate Chapter {i + 1} after all attempts")
                        translation = f"TRANSLATION FAILED: {chapter['title']}"
                    else:
                        time.sleep(30)

        # Add the complete chapter translation to results
        translations.append({
            'title': chapter['title'],
            'translated_content': translation
        })
        print(f"Completed translation of Chapter {i + 1}")
        print("First 500 characters of translation:")
        print(translation[:500] + "...")
        print('=======================================')
        time.sleep(5)  # Sleep between requests

    # Save all translations
    with open(TRANSLATIONS_FILE, 'w', encoding='utf-8') as f:
        json.dump(translations, f, ensure_ascii=False, indent=2)
    print(f'Translation completed. Translations saved to {TRANSLATIONS_FILE}')

# Gradio Interface
def process_novel(first_chapter_url, final_url, novel_name, gemini_api_key, groq_api_key):
    # Scrape chapters
    asyncio.run(scrape_task(first_chapter_url, final_url))

    # Process chapters (split long chapters)
    process_chapters(CHAPTERS_FILE, SPLIT_CHAPTERS_FILE)
    
    create_glossary(gemini_api_key, groq_api_key)

    # Translate chapters
    translate_task(gemini_api_key, groq_api_key)

    return "Scraping, Processing, and Translation Completed!"

# Gradio Interface
iface = gr.Interface(
    fn=process_novel,
    inputs=[
        gr.Textbox(label="First Chapter URL"),
        gr.Textbox(label="Final Chapter URL (optional)"),
        gr.Textbox(label="Novel Name"),
        gr.Textbox(label="Gemini API Key"),
        gr.Textbox(label="Groq API Key (optional)"),
    ],
    outputs="text",
    title="Novel Scraper and Translator",
    description="Input the first chapter URL, final chapter URL (optional), novel name, and API keys to scrape and translate the novel."
)

iface.launch()