Spaces:

delta-praticle
/

KDSH_Task_1

Sleeping

App Files Files Community

delta-praticle commited on Jan 17

Commit

4efcb08

verified ·

1 Parent(s): 08a9ca2

Update app.py

Browse files

Files changed (1) hide show

app.py +214 -12

app.py CHANGED Viewed

@@ -1,3 +1,207 @@
 import gradio as gr
 import pdfplumber
 import networkx as nx
@@ -17,13 +221,11 @@ groq_api_key = "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"
 llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
 llm_transformer = LLMGraphTransformer(llm=llm)
 def extract_text_from_pdf(pdf_path):
     with pdfplumber.open(pdf_path) as pdf:
         extracted_text = "".join([page.extract_text() for page in pdf.pages])
     return extracted_text
 def scibert_chunking(text, chunk_size=256, max_chunks=6):
     tokens = tokenizer.tokenize(text)
     chunks = [
@@ -32,7 +234,6 @@ def scibert_chunking(text, chunk_size=256, max_chunks=6):
     ]
     return chunks
 def process_text_with_llm(text):
     chunks = scibert_chunking(text)
     documents = [Document(page_content=chunk) for chunk in chunks]
@@ -41,7 +242,6 @@ def process_text_with_llm(text):
     ]
     return graph_documents
 def build_graph(graph_documents):
     graph = nx.DiGraph()
     for graph_doc in graph_documents:
@@ -52,13 +252,11 @@ def build_graph(graph_documents):
             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
     return graph
 def calculate_average_top_5_pagerank(graph):
     pagerank = nx.pagerank(graph)
     top_5 = sorted(pagerank.values(), reverse=True)[:5]
     return sum(top_5) / len(top_5) if top_5 else 0, pagerank
 def draw_static_graph(graph, output_path="graph.png"):
     plt.figure(figsize=(10, 8))
     pos = nx.spring_layout(graph, seed=42)
@@ -76,7 +274,6 @@ def draw_static_graph(graph, output_path="graph.png"):
     plt.savefig(output_path)
     plt.close()
 def generate_interactive_plotly_graph(graph, pagerank):
     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes
@@ -139,7 +336,6 @@ def generate_interactive_plotly_graph(graph, pagerank):
     return fig
 def classify_and_visualize_pdf(pdf_path):
     try:
         # Step 1: Extract text from the PDF
@@ -171,9 +367,15 @@ def classify_and_visualize_pdf(pdf_path):
     except Exception as e:
         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
 # Gradio app instance
-with gr.Blocks() as demo:
     gr.Markdown(
         """
         # 📄 Research Paper Classifier with Knowledge Graphs
@@ -185,8 +387,8 @@ with gr.Blocks() as demo:
     )
     with gr.Row():
-        pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
-        submit_btn = gr.Button("Classify Paper")
     with gr.Row():
         result_output = gr.HTML(label="Classification Result")

+# import gradio as gr
+# import pdfplumber
+# import networkx as nx
+# import pandas as pd
+# import matplotlib.pyplot as plt
+# import plotly.graph_objects as go
+# from transformers import AutoTokenizer
+# from langchain_core.documents import Document
+# from langchain_experimental.graph_transformers import LLMGraphTransformer
+# from langchain_groq import ChatGroq
+# import os
+# # Initialize components
+# scibert_model = "allenai/scibert_scivocab_uncased"
+# tokenizer = AutoTokenizer.from_pretrained(scibert_model)
+# groq_api_key = "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"
+# llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
+# llm_transformer = LLMGraphTransformer(llm=llm)
+# def extract_text_from_pdf(pdf_path):
+#     with pdfplumber.open(pdf_path) as pdf:
+#         extracted_text = "".join([page.extract_text() for page in pdf.pages])
+#     return extracted_text
+# def scibert_chunking(text, chunk_size=256, max_chunks=6):
+#     tokens = tokenizer.tokenize(text)
+#     chunks = [
+#         tokenizer.convert_tokens_to_string(tokens[i : i + chunk_size])
+#         for i in range(0, min(len(tokens), chunk_size * max_chunks), chunk_size)
+#     ]
+#     return chunks
+# def process_text_with_llm(text):
+#     chunks = scibert_chunking(text)
+#     documents = [Document(page_content=chunk) for chunk in chunks]
+#     graph_documents = [
+#         llm_transformer.convert_to_graph_documents([doc])[0] for doc in documents
+#     ]
+#     return graph_documents
+# def build_graph(graph_documents):
+#     graph = nx.DiGraph()
+#     for graph_doc in graph_documents:
+#         for node in graph_doc.nodes:
+#             label = node.properties.get("name", node.id)
+#             graph.add_node(node.id, label=label)
+#         for rel in graph_doc.relationships:
+#             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
+#     return graph
+# def calculate_average_top_5_pagerank(graph):
+#     pagerank = nx.pagerank(graph)
+#     top_5 = sorted(pagerank.values(), reverse=True)[:5]
+#     return sum(top_5) / len(top_5) if top_5 else 0, pagerank
+# def draw_static_graph(graph, output_path="graph.png"):
+#     plt.figure(figsize=(10, 8))
+#     pos = nx.spring_layout(graph, seed=42)
+#     nx.draw(
+#         graph,
+#         pos,
+#         with_labels=True,
+#         node_size=500,
+#         node_color="lightblue",
+#         font_size=8,
+#         font_weight="bold",
+#         edge_color="gray",
+#     )
+#     plt.title("Static Knowledge Graph")
+#     plt.savefig(output_path)
+#     plt.close()
+# def generate_interactive_plotly_graph(graph, pagerank):
+#     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes
+#     edge_x = []
+#     edge_y = []
+#     for edge in graph.edges():
+#         x0, y0 = pos[edge[0]]
+#         x1, y1 = pos[edge[1]]
+#         edge_x.extend([x0, x1, None])
+#         edge_y.extend([y0, y1, None])
+#     edge_trace = go.Scatter(
+#         x=edge_x,
+#         y=edge_y,
+#         line=dict(width=0.5, color="#888"),
+#         hoverinfo="none",
+#         mode="lines",
+#     )
+#     node_x = []
+#     node_y = []
+#     node_text = []
+#     for node in graph.nodes():
+#         x, y = pos[node]
+#         node_x.append(x)
+#         node_y.append(y)
+#         label = graph.nodes[node].get("label", str(node))  # Default to node ID if label is missing
+#         pagerank_score = pagerank.get(node, 0)
+#         node_text.append(f"{label}<br>{pagerank_score:.4f}")
+#     node_trace = go.Scatter(
+#         x=node_x,
+#         y=node_y,
+#         mode="markers+text",
+#         text=node_text,
+#         hoverinfo="text",
+#         marker=dict(
+#             showscale=True,
+#             colorscale="YlGnBu",
+#             size=10,
+#             color=list(pagerank.values()),
+#             colorbar=dict(
+#                 thickness=15,
+#                 title="PageRank",
+#                 xanchor="left",
+#                 titleside="right"
+#             ),
+#         ),
+#     )
+#     fig = go.Figure(data=[edge_trace, node_trace])
+#     fig.update_layout(
+#         showlegend=False,
+#         hovermode="closest",
+#         margin=dict(b=0, l=0, r=0, t=0),
+#         xaxis=dict(showgrid=False, zeroline=False),
+#         yaxis=dict(showgrid=False, zeroline=False),
+#     )
+#     return fig
+# def classify_and_visualize_pdf(pdf_path):
+#     try:
+#         # Step 1: Extract text from the PDF
+#         text = extract_text_from_pdf(pdf_path)
+#         # Step 2: Process text to generate a knowledge graph
+#         graph_documents = process_text_with_llm(text)
+#         graph = build_graph(graph_documents)
+#         # Step 3: Calculate PageRank and classify
+#         avg_top_5_pagerank, pagerank = calculate_average_top_5_pagerank(graph)
+#         classification = "Publishable" if avg_top_5_pagerank >= 0.0526 else "Non-Publishable"
+#         # Step 4: Draw the static graph and save as image
+#         static_graph_path = "knowledge_graph.png"
+#         draw_static_graph(graph, static_graph_path)
+#         # Step 5: Generate the interactive Plotly graph
+#         interactive_fig = generate_interactive_plotly_graph(graph, pagerank)
+#         # Step 6: Prepare formatted result
+#         result_html = f"""
+#         <h3>Classification Result</h3>
+#         <p><strong>Classification:</strong> {classification}</p>
+#         <p><strong>Average Top 5 PageRank:</strong> {avg_top_5_pagerank:.4f}</p>
+#         """
+#         return result_html, static_graph_path, interactive_fig
+#     except Exception as e:
+#         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
+# # Gradio app instance
+# with gr.Blocks() as demo:
+#     gr.Markdown(
+#         """
+#         # 📄 Research Paper Classifier with Knowledge Graphs
+#         Upload a PDF research paper, and the app will:
+#         1. **Generate a Static Knowledge Graph**
+#         2. **Generate an Interactive Knowledge Graph** (hover over nodes to see details)
+#         3. **Calculate PageRank and classify the paper as Publishable or Non-Publishable**
+#         """
+#     )
+#     with gr.Row():
+#         pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
+#         submit_btn = gr.Button("Classify Paper")
+#     with gr.Row():
+#         result_output = gr.HTML(label="Classification Result")
+#     with gr.Row():
+#         static_graph_output = gr.Image(label="Static Knowledge Graph", type="filepath")
+#         interactive_graph_output = gr.Plot(label="Interactive Knowledge Graph")
+#     submit_btn.click(
+#         fn=classify_and_visualize_pdf,
+#         inputs=pdf_input,
+#         outputs=[result_output, static_graph_output, interactive_graph_output],
+#     )
+# demo.launch()
 import gradio as gr
 import pdfplumber
 import networkx as nx
 llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
 llm_transformer = LLMGraphTransformer(llm=llm)
 def extract_text_from_pdf(pdf_path):
     with pdfplumber.open(pdf_path) as pdf:
         extracted_text = "".join([page.extract_text() for page in pdf.pages])
     return extracted_text
 def scibert_chunking(text, chunk_size=256, max_chunks=6):
     tokens = tokenizer.tokenize(text)
     chunks = [
     ]
     return chunks
 def process_text_with_llm(text):
     chunks = scibert_chunking(text)
     documents = [Document(page_content=chunk) for chunk in chunks]
     ]
     return graph_documents
 def build_graph(graph_documents):
     graph = nx.DiGraph()
     for graph_doc in graph_documents:
             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
     return graph
 def calculate_average_top_5_pagerank(graph):
     pagerank = nx.pagerank(graph)
     top_5 = sorted(pagerank.values(), reverse=True)[:5]
     return sum(top_5) / len(top_5) if top_5 else 0, pagerank
 def draw_static_graph(graph, output_path="graph.png"):
     plt.figure(figsize=(10, 8))
     pos = nx.spring_layout(graph, seed=42)
     plt.savefig(output_path)
     plt.close()
 def generate_interactive_plotly_graph(graph, pagerank):
     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes
     return fig
 def classify_and_visualize_pdf(pdf_path):
     try:
         # Step 1: Extract text from the PDF
     except Exception as e:
         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
 # Gradio app instance
+with gr.Blocks(css="""
+    body {background: linear-gradient(to right, #6A11CB, #2575FC); color: white;}
+    .gr-button {background: #34A853; color: white; border-radius: 8px;}
+    .gr-button:hover {background: #2F8A43;}
+    .gr-markdown {font-family: 'Roboto', sans-serif; text-align: center;}
+    .gr-file-upload {border: 2px dashed #fff;}
+    .gr-row {padding: 10px; justify-content: center; align-items: center;}
+""") as demo:
     gr.Markdown(
         """
         # 📄 Research Paper Classifier with Knowledge Graphs
     )
     with gr.Row():
+        pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"], elem_id="upload")
+        submit_btn = gr.Button("Classify Paper", elem_id="submit")
     with gr.Row():
         result_output = gr.HTML(label="Classification Result")