# # import gradio as gr
# # import pdfplumber
# # import networkx as nx
# # import pandas as pd
# # import matplotlib.pyplot as plt
# # import plotly.graph_objects as go
# # from transformers import AutoTokenizer
# # from langchain_core.documents import Document
# # from langchain_experimental.graph_transformers import LLMGraphTransformer
# # from langchain_groq import ChatGroq
# # import os

# # # Initialize components
# # scibert_model = "allenai/scibert_scivocab_uncased"
# # tokenizer = AutoTokenizer.from_pretrained(scibert_model)
# # groq_api_key = "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"
# # llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
# # llm_transformer = LLMGraphTransformer(llm=llm)


# # def extract_text_from_pdf(pdf_path):
# #     with pdfplumber.open(pdf_path) as pdf:
# #         extracted_text = "".join([page.extract_text() for page in pdf.pages])
# #     return extracted_text


# # def scibert_chunking(text, chunk_size=256, max_chunks=6):
# #     tokens = tokenizer.tokenize(text)
# #     chunks = [
# #         tokenizer.convert_tokens_to_string(tokens[i : i + chunk_size])
# #         for i in range(0, min(len(tokens), chunk_size * max_chunks), chunk_size)
# #     ]
# #     return chunks


# # def process_text_with_llm(text):
# #     chunks = scibert_chunking(text)
# #     documents = [Document(page_content=chunk) for chunk in chunks]
# #     graph_documents = [
# #         llm_transformer.convert_to_graph_documents([doc])[0] for doc in documents
# #     ]
# #     return graph_documents


# # def build_graph(graph_documents):
# #     graph = nx.DiGraph()
# #     for graph_doc in graph_documents:
# #         for node in graph_doc.nodes:
# #             label = node.properties.get("name", node.id)
# #             graph.add_node(node.id, label=label)
# #         for rel in graph_doc.relationships:
# #             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
# #     return graph


# # def calculate_average_top_5_pagerank(graph):
# #     pagerank = nx.pagerank(graph)
# #     top_5 = sorted(pagerank.values(), reverse=True)[:5]
# #     return sum(top_5) / len(top_5) if top_5 else 0, pagerank


# # def draw_static_graph(graph, output_path="graph.png"):
# #     plt.figure(figsize=(10, 8))
# #     pos = nx.spring_layout(graph, seed=42)
# #     nx.draw(
# #         graph,
# #         pos,
# #         with_labels=True,
# #         node_size=500,
# #         node_color="lightblue",
# #         font_size=8,
# #         font_weight="bold",
# #         edge_color="gray",
# #     )
# #     plt.title("Static Knowledge Graph")
# #     plt.savefig(output_path)
# #     plt.close()


# # def generate_interactive_plotly_graph(graph, pagerank):
# #     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes

# #     edge_x = []
# #     edge_y = []
# #     for edge in graph.edges():
# #         x0, y0 = pos[edge[0]]
# #         x1, y1 = pos[edge[1]]
# #         edge_x.extend([x0, x1, None])
# #         edge_y.extend([y0, y1, None])

# #     edge_trace = go.Scatter(
# #         x=edge_x,
# #         y=edge_y,
# #         line=dict(width=0.5, color="#888"),
# #         hoverinfo="none",
# #         mode="lines",
# #     )

# #     node_x = []
# #     node_y = []
# #     node_text = []
# #     for node in graph.nodes():
# #         x, y = pos[node]
# #         node_x.append(x)
# #         node_y.append(y)

# #         label = graph.nodes[node].get("label", str(node))  # Default to node ID if label is missing
# #         pagerank_score = pagerank.get(node, 0)
# #         node_text.append(f"{label}<br>{pagerank_score:.4f}")

# #     node_trace = go.Scatter(
# #         x=node_x,
# #         y=node_y,
# #         mode="markers+text",
# #         text=node_text,
# #         hoverinfo="text",
# #         marker=dict(
# #             showscale=True,
# #             colorscale="YlGnBu",
# #             size=10,
# #             color=list(pagerank.values()),
# #             colorbar=dict(
# #                 thickness=15,
# #                 title="PageRank",
# #                 xanchor="left",
# #                 titleside="right"
# #             ),
# #         ),
# #     )

# #     fig = go.Figure(data=[edge_trace, node_trace])
# #     fig.update_layout(
# #         showlegend=False,
# #         hovermode="closest",
# #         margin=dict(b=0, l=0, r=0, t=0),
# #         xaxis=dict(showgrid=False, zeroline=False),
# #         yaxis=dict(showgrid=False, zeroline=False),
# #     )

# #     return fig


# # def classify_and_visualize_pdf(pdf_path):
# #     try:
# #         # Step 1: Extract text from the PDF
# #         text = extract_text_from_pdf(pdf_path)

# #         # Step 2: Process text to generate a knowledge graph
# #         graph_documents = process_text_with_llm(text)
# #         graph = build_graph(graph_documents)

# #         # Step 3: Calculate PageRank and classify
# #         avg_top_5_pagerank, pagerank = calculate_average_top_5_pagerank(graph)
# #         classification = "Publishable" if avg_top_5_pagerank >= 0.0526 else "Non-Publishable"

# #         # Step 4: Draw the static graph and save as image
# #         static_graph_path = "knowledge_graph.png"
# #         draw_static_graph(graph, static_graph_path)

# #         # Step 5: Generate the interactive Plotly graph
# #         interactive_fig = generate_interactive_plotly_graph(graph, pagerank)

# #         # Step 6: Prepare formatted result
# #         result_html = f"""
# #         <h3>Classification Result</h3>
# #         <p><strong>Classification:</strong> {classification}</p>
# #         <p><strong>Average Top 5 PageRank:</strong> {avg_top_5_pagerank:.4f}</p>
# #         """

# #         return result_html, static_graph_path, interactive_fig
# #     except Exception as e:
# #         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None


# # # Gradio app instance
# # with gr.Blocks() as demo:
# #     gr.Markdown(
# #         """
# #         # 📄 Research Paper Classifier with Knowledge Graphs
# #         Upload a PDF research paper, and the app will:
# #         1. **Generate a Static Knowledge Graph**
# #         2. **Generate an Interactive Knowledge Graph** (hover over nodes to see details)
# #         3. **Calculate PageRank and classify the paper as Publishable or Non-Publishable**
# #         """
# #     )

# #     with gr.Row():
# #         pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
# #         submit_btn = gr.Button("Classify Paper")

# #     with gr.Row():
# #         result_output = gr.HTML(label="Classification Result")

# #     with gr.Row():
# #         static_graph_output = gr.Image(label="Static Knowledge Graph", type="filepath")
# #         interactive_graph_output = gr.Plot(label="Interactive Knowledge Graph")

# #     submit_btn.click(
# #         fn=classify_and_visualize_pdf,
# #         inputs=pdf_input,
# #         outputs=[result_output, static_graph_output, interactive_graph_output],
# #     )

# # demo.launch()
# import gradio as gr
# import pdfplumber
# import networkx as nx
# import pandas as pd
# import matplotlib.pyplot as plt
# import plotly.graph_objects as go
# from transformers import AutoTokenizer
# from langchain_core.documents import Document
# from langchain_experimental.graph_transformers import LLMGraphTransformer
# from langchain_groq import ChatGroq
# import os

# # Initialize components
# scibert_model = "allenai/scibert_scivocab_uncased"
# tokenizer = AutoTokenizer.from_pretrained(scibert_model)
# groq_api_key = "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"
# llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
# llm_transformer = LLMGraphTransformer(llm=llm)

# def extract_text_from_pdf(pdf_path):
#     with pdfplumber.open(pdf_path) as pdf:
#         extracted_text = "".join([page.extract_text() for page in pdf.pages])
#     return extracted_text

# def scibert_chunking(text, chunk_size=256, max_chunks=6):
#     tokens = tokenizer.tokenize(text)
#     chunks = [
#         tokenizer.convert_tokens_to_string(tokens[i : i + chunk_size])
#         for i in range(0, min(len(tokens), chunk_size * max_chunks), chunk_size)
#     ]
#     return chunks

# def process_text_with_llm(text):
#     chunks = scibert_chunking(text)
#     documents = [Document(page_content=chunk) for chunk in chunks]
#     graph_documents = [
#         llm_transformer.convert_to_graph_documents([doc])[0] for doc in documents
#     ]
#     return graph_documents

# def build_graph(graph_documents):
#     graph = nx.DiGraph()
#     for graph_doc in graph_documents:
#         for node in graph_doc.nodes:
#             label = node.properties.get("name", node.id)
#             graph.add_node(node.id, label=label)
#         for rel in graph_doc.relationships:
#             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
#     return graph

# def calculate_average_top_5_pagerank(graph):
#     pagerank = nx.pagerank(graph)
#     top_5 = sorted(pagerank.values(), reverse=True)[:5]
#     return sum(top_5) / len(top_5) if top_5 else 0, pagerank

# def draw_static_graph(graph, output_path="graph.png"):
#     plt.figure(figsize=(10, 8))
#     pos = nx.spring_layout(graph, seed=42)
#     nx.draw(
#         graph,
#         pos,
#         with_labels=True,
#         node_size=500,
#         node_color="lightblue",
#         font_size=8,
#         font_weight="bold",
#         edge_color="gray",
#     )
#     plt.title("Static Knowledge Graph")
#     plt.savefig(output_path)
#     plt.close()

# def generate_interactive_plotly_graph(graph, pagerank):
#     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes

#     edge_x = []
#     edge_y = []
#     for edge in graph.edges():
#         x0, y0 = pos[edge[0]]
#         x1, y1 = pos[edge[1]]
#         edge_x.extend([x0, x1, None])
#         edge_y.extend([y0, y1, None])

#     edge_trace = go.Scatter(
#         x=edge_x,
#         y=edge_y,
#         line=dict(width=0.5, color="#888"),
#         hoverinfo="none",
#         mode="lines",
#     )

#     node_x = []
#     node_y = []
#     node_text = []
#     for node in graph.nodes():
#         x, y = pos[node]
#         node_x.append(x)
#         node_y.append(y)

#         label = graph.nodes[node].get("label", str(node))  # Default to node ID if label is missing
#         pagerank_score = pagerank.get(node, 0)
#         node_text.append(f"{label}<br>{pagerank_score:.4f}")

#     node_trace = go.Scatter(
#         x=node_x,
#         y=node_y,
#         mode="markers+text",
#         text=node_text,
#         hoverinfo="text",
#         marker=dict(
#             showscale=True,
#             colorscale="YlGnBu",
#             size=10,
#             color=list(pagerank.values()),
#             colorbar=dict(
#                 thickness=15,
#                 title="PageRank",
#                 xanchor="left",
#                 titleside="right"
#             ),
#         ),
#     )

#     fig = go.Figure(data=[edge_trace, node_trace])
#     fig.update_layout(
#         showlegend=False,
#         hovermode="closest",
#         margin=dict(b=0, l=0, r=0, t=0),
#         xaxis=dict(showgrid=False, zeroline=False),
#         yaxis=dict(showgrid=False, zeroline=False),
#     )

#     return fig

# def classify_and_visualize_pdf(pdf_path):
#     try:
#         # Step 1: Extract text from the PDF
#         text = extract_text_from_pdf(pdf_path)

#         # Step 2: Process text to generate a knowledge graph
#         graph_documents = process_text_with_llm(text)
#         graph = build_graph(graph_documents)

#         # Step 3: Calculate PageRank and classify
#         avg_top_5_pagerank, pagerank = calculate_average_top_5_pagerank(graph)
#         classification = "Publishable" if avg_top_5_pagerank >= 0.0526 else "Non-Publishable"

#         # Step 4: Draw the static graph and save as image
#         static_graph_path = "knowledge_graph.png"
#         draw_static_graph(graph, static_graph_path)

#         # Step 5: Generate the interactive Plotly graph
#         interactive_fig = generate_interactive_plotly_graph(graph, pagerank)

#         # Step 6: Prepare formatted result
#         result_html = f"""
#         <h3>Classification Result</h3>
#         <p><strong>Classification:</strong> {classification}</p>
#         <p><strong>Average Top 5 PageRank:</strong> {avg_top_5_pagerank:.4f}</p>
#         """

#         return result_html, static_graph_path, interactive_fig
#     except Exception as e:
#         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None

# # Gradio app instance
# with gr.Blocks(css=""" 
#     body {background: linear-gradient(to right, #6A11CB, #2575FC); color: white;}
#     .gr-button {background: #34A853; color: white; border-radius: 8px;}
#     .gr-button:hover {background: #2F8A43;}
#     .gr-markdown {font-family: 'Roboto', sans-serif; text-align: center;}
#     .gr-file-upload {border: 2px dashed #fff;}
#     .gr-row {padding: 10px; justify-content: center; align-items: center;}
# """) as demo:
#     gr.Markdown(
#         """
#         # 📄 Research Paper Classifier with Knowledge Graphs
#         Upload a PDF research paper, and the app will:
#         1. **Generate a Static Knowledge Graph**
#         2. **Generate an Interactive Knowledge Graph** (hover over nodes to see details)
#         3. **Calculate PageRank and classify the paper as Publishable or Non-Publishable**
#         """
#     )

#     with gr.Row():
#         pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"], elem_id="upload")
#         submit_btn = gr.Button("Classify Paper", elem_id="submit")

#     with gr.Row():
#         result_output = gr.HTML(label="Classification Result")

#     with gr.Row():
#         static_graph_output = gr.Image(label="Static Knowledge Graph", type="filepath")
#         interactive_graph_output = gr.Plot(label="Interactive Knowledge Graph")

#     submit_btn.click(
#         fn=classify_and_visualize_pdf,
#         inputs=pdf_input,
#         outputs=[result_output, static_graph_output, interactive_graph_output],
#     )

# demo.launch()
import gradio as gr
import pdfplumber
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from transformers import AutoTokenizer
from langchain_core.documents import Document
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_groq import ChatGroq
import os

# Initialize components
scibert_model = "allenai/scibert_scivocab_uncased"
tokenizer = AutoTokenizer.from_pretrained(scibert_model)
groq_api_key = "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"
llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
llm_transformer = LLMGraphTransformer(llm=llm)

def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        extracted_text = "".join([page.extract_text() for page in pdf.pages])
    return extracted_text

def scibert_chunking(text, chunk_size=256, max_chunks=6):
    tokens = tokenizer.tokenize(text)
    chunks = [
        tokenizer.convert_tokens_to_string(tokens[i : i + chunk_size])
        for i in range(0, min(len(tokens), chunk_size * max_chunks), chunk_size)
    ]
    return chunks

def process_text_with_llm(text):
    chunks = scibert_chunking(text)
    documents = [Document(page_content=chunk) for chunk in chunks]
    graph_documents = [
        llm_transformer.convert_to_graph_documents([doc])[0] for doc in documents
    ]
    return graph_documents

def build_graph(graph_documents):
    graph = nx.DiGraph()
    for graph_doc in graph_documents:
        for node in graph_doc.nodes:
            label = node.properties.get("name", node.id)
            graph.add_node(node.id, label=label)
        for rel in graph_doc.relationships:
            graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
    return graph

def calculate_average_top_5_pagerank(graph):
    pagerank = nx.pagerank(graph)
    top_5 = sorted(pagerank.values(), reverse=True)[:5]
    return sum(top_5) / len(top_5) if top_5 else 0, pagerank

def draw_static_graph(graph, output_path="graph.png"):
    plt.figure(figsize=(10, 8))
    pos = nx.spring_layout(graph, seed=42)
    nx.draw(
        graph,
        pos,
        with_labels=True,
        node_size=500,
        node_color="lightblue",
        font_size=8,
        font_weight="bold",
        edge_color="gray",
    )
    plt.title("Static Knowledge Graph")
    plt.savefig(output_path)
    plt.close()

def generate_interactive_plotly_graph(graph, pagerank):
    pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes

    edge_x = []
    edge_y = []
    for edge in graph.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])

    edge_trace = go.Scatter(
        x=edge_x,
        y=edge_y,
        line=dict(width=0.5, color="#888"),
        hoverinfo="none",
        mode="lines",
    )

    node_x = []
    node_y = []
    node_text = []
    for node in graph.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

        label = graph.nodes[node].get("label", str(node))  # Default to node ID if label is missing
        pagerank_score = pagerank.get(node, 0)
        node_text.append(f"{label}<br>{pagerank_score:.4f}")

    node_trace = go.Scatter(
        x=node_x,
        y=node_y,
        mode="markers+text",
        text=node_text,
        hoverinfo="text",
        marker=dict(
            showscale=True,
            colorscale="YlGnBu",
            size=10,
            color=list(pagerank.values()),
            colorbar=dict(
                thickness=15,
                title="PageRank",
                xanchor="left",
                titleside="right"
            ),
        ),
    )

    fig = go.Figure(data=[edge_trace, node_trace])
    fig.update_layout(
        showlegend=False,
        hovermode="closest",
        margin=dict(b=0, l=0, r=0, t=0),
        xaxis=dict(showgrid=False, zeroline=False),
        yaxis=dict(showgrid=False, zeroline=False),
    )

    return fig

def classify_and_visualize_pdf(pdf_path):
    try:
        # Step 1: Extract text from the PDF
        text = extract_text_from_pdf(pdf_path)

        # Step 2: Process text to generate a knowledge graph
        graph_documents = process_text_with_llm(text)
        graph = build_graph(graph_documents)

        # Step 3: Calculate PageRank and classify
        avg_top_5_pagerank, pagerank = calculate_average_top_5_pagerank(graph)
        classification = "Publishable" if avg_top_5_pagerank >= 0.0526 else "Non-Publishable"

        # Step 4: Draw the static graph and save as image
        static_graph_path = "knowledge_graph.png"
        draw_static_graph(graph, static_graph_path)

        # Step 5: Generate the interactive Plotly graph
        interactive_fig = generate_interactive_plotly_graph(graph, pagerank)

        # Step 6: Prepare formatted result
        result_html = f"""
        <h3>Classification Result</h3>
        <p><strong>Classification:</strong> {classification}</p>
        <p><strong>Average Top 5 PageRank:</strong> {avg_top_5_pagerank:.4f}</p>
        """

        return result_html, static_graph_path, interactive_fig
    except Exception as e:
        return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None

# Gradio app instance
with gr.Blocks(css=""" 
    body {
        background: linear-gradient(to right, #6A11CB, #2575FC); 
        color: white;
        font-family: 'Poppins', sans-serif;
    }
    .gr-button {
        background: #34A853; 
        color: white; 
        border-radius: 8px;
        padding: 10px 20px;
        font-size: 16px;
        transition: background 0.3s ease;
    }
    .gr-button:hover {
        background: #2F8A43;
        transform: scale(1.05);
    }
    .gr-markdown {
        font-family: 'Poppins', sans-serif; 
        text-align: center; 
        font-size: 18px;
        padding: 10px;
        background: rgba(255, 255, 255, 0.2);
        border-radius: 10px;
    }
    .gr-file-upload {
        border: 2px dashed #fff;
        padding: 20px;
        border-radius: 10px;
        transition: border-color 0.3s ease;
    }
    .gr-file-upload:hover {
        border-color: #34A853;
    }
    .gr-row {
        padding: 10px; 
        justify-content: center; 
        align-items: center;
    }
""") as demo:
    gr.Markdown(
        """
        # 📄 Research Paper Classifier with Knowledge Graphs
        Upload a PDF research paper, and the app will:
        1. **Generate a Static Knowledge Graph**
        2. **Generate an Interactive Knowledge Graph** (hover over nodes to see details)
        3. **Calculate PageRank and classify the paper as Publishable or Non-Publishable**
        """
    )

    with gr.Row():
        pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"], elem_id="upload")
        submit_btn = gr.Button("Classify Paper", elem_id="submit")

    with gr.Row():
        result_output = gr.HTML(label="Classification Result")

    with gr.Row():
        static_graph_output = gr.Image(label="Static Knowledge Graph", type="filepath")
        interactive_graph_output = gr.Plot(label="Interactive Knowledge Graph")

    submit_btn.click(
        fn=classify_and_visualize_pdf,
        inputs=pdf_input,
        outputs=[result_output, static_graph_output, interactive_graph_output],
    )

demo.launch()