Spaces:

delta-praticle
/

KDSH_Task_1

Sleeping

App Files Files Community

delta-praticle commited on Jan 17

Commit

34a0db0

verified ·

1 Parent(s): 4efcb08

Update app.py

Browse files

Files changed (1) hide show

app.py +253 -18

app.py CHANGED Viewed

@@ -1,3 +1,207 @@
 # import gradio as gr
 # import pdfplumber
 # import networkx as nx
@@ -17,13 +221,11 @@
 # llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
 # llm_transformer = LLMGraphTransformer(llm=llm)
 # def extract_text_from_pdf(pdf_path):
 #     with pdfplumber.open(pdf_path) as pdf:
 #         extracted_text = "".join([page.extract_text() for page in pdf.pages])
 #     return extracted_text
 # def scibert_chunking(text, chunk_size=256, max_chunks=6):
 #     tokens = tokenizer.tokenize(text)
 #     chunks = [
@@ -32,7 +234,6 @@
 #     ]
 #     return chunks
 # def process_text_with_llm(text):
 #     chunks = scibert_chunking(text)
 #     documents = [Document(page_content=chunk) for chunk in chunks]
@@ -41,7 +242,6 @@
 #     ]
 #     return graph_documents
 # def build_graph(graph_documents):
 #     graph = nx.DiGraph()
 #     for graph_doc in graph_documents:
@@ -52,13 +252,11 @@
 #             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
 #     return graph
 # def calculate_average_top_5_pagerank(graph):
 #     pagerank = nx.pagerank(graph)
 #     top_5 = sorted(pagerank.values(), reverse=True)[:5]
 #     return sum(top_5) / len(top_5) if top_5 else 0, pagerank
 # def draw_static_graph(graph, output_path="graph.png"):
 #     plt.figure(figsize=(10, 8))
 #     pos = nx.spring_layout(graph, seed=42)
@@ -76,7 +274,6 @@
 #     plt.savefig(output_path)
 #     plt.close()
 # def generate_interactive_plotly_graph(graph, pagerank):
 #     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes
@@ -139,7 +336,6 @@
 #     return fig
 # def classify_and_visualize_pdf(pdf_path):
 #     try:
 #         # Step 1: Extract text from the PDF
@@ -171,9 +367,15 @@
 #     except Exception as e:
 #         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
 # # Gradio app instance
-# with gr.Blocks() as demo:
 #     gr.Markdown(
 #         """
 #         # 📄 Research Paper Classifier with Knowledge Graphs
@@ -185,8 +387,8 @@
 #     )
 #     with gr.Row():
-#         pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
-#         submit_btn = gr.Button("Classify Paper")
 #     with gr.Row():
 #         result_output = gr.HTML(label="Classification Result")
@@ -369,12 +571,45 @@ def classify_and_visualize_pdf(pdf_path):
 # Gradio app instance
 with gr.Blocks(css="""
-    body {background: linear-gradient(to right, #6A11CB, #2575FC); color: white;}
-    .gr-button {background: #34A853; color: white; border-radius: 8px;}
-    .gr-button:hover {background: #2F8A43;}
-    .gr-markdown {font-family: 'Roboto', sans-serif; text-align: center;}
-    .gr-file-upload {border: 2px dashed #fff;}
-    .gr-row {padding: 10px; justify-content: center; align-items: center;}
 """) as demo:
     gr.Markdown(
         """

+# # import gradio as gr
+# # import pdfplumber
+# # import networkx as nx
+# # import pandas as pd
+# # import matplotlib.pyplot as plt
+# # import plotly.graph_objects as go
+# # from transformers import AutoTokenizer
+# # from langchain_core.documents import Document
+# # from langchain_experimental.graph_transformers import LLMGraphTransformer
+# # from langchain_groq import ChatGroq
+# # import os
+# # # Initialize components
+# # scibert_model = "allenai/scibert_scivocab_uncased"
+# # tokenizer = AutoTokenizer.from_pretrained(scibert_model)
+# # groq_api_key = "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"
+# # llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
+# # llm_transformer = LLMGraphTransformer(llm=llm)
+# # def extract_text_from_pdf(pdf_path):
+# #     with pdfplumber.open(pdf_path) as pdf:
+# #         extracted_text = "".join([page.extract_text() for page in pdf.pages])
+# #     return extracted_text
+# # def scibert_chunking(text, chunk_size=256, max_chunks=6):
+# #     tokens = tokenizer.tokenize(text)
+# #     chunks = [
+# #         tokenizer.convert_tokens_to_string(tokens[i : i + chunk_size])
+# #         for i in range(0, min(len(tokens), chunk_size * max_chunks), chunk_size)
+# #     ]
+# #     return chunks
+# # def process_text_with_llm(text):
+# #     chunks = scibert_chunking(text)
+# #     documents = [Document(page_content=chunk) for chunk in chunks]
+# #     graph_documents = [
+# #         llm_transformer.convert_to_graph_documents([doc])[0] for doc in documents
+# #     ]
+# #     return graph_documents
+# # def build_graph(graph_documents):
+# #     graph = nx.DiGraph()
+# #     for graph_doc in graph_documents:
+# #         for node in graph_doc.nodes:
+# #             label = node.properties.get("name", node.id)
+# #             graph.add_node(node.id, label=label)
+# #         for rel in graph_doc.relationships:
+# #             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
+# #     return graph
+# # def calculate_average_top_5_pagerank(graph):
+# #     pagerank = nx.pagerank(graph)
+# #     top_5 = sorted(pagerank.values(), reverse=True)[:5]
+# #     return sum(top_5) / len(top_5) if top_5 else 0, pagerank
+# # def draw_static_graph(graph, output_path="graph.png"):
+# #     plt.figure(figsize=(10, 8))
+# #     pos = nx.spring_layout(graph, seed=42)
+# #     nx.draw(
+# #         graph,
+# #         pos,
+# #         with_labels=True,
+# #         node_size=500,
+# #         node_color="lightblue",
+# #         font_size=8,
+# #         font_weight="bold",
+# #         edge_color="gray",
+# #     )
+# #     plt.title("Static Knowledge Graph")
+# #     plt.savefig(output_path)
+# #     plt.close()
+# # def generate_interactive_plotly_graph(graph, pagerank):
+# #     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes
+# #     edge_x = []
+# #     edge_y = []
+# #     for edge in graph.edges():
+# #         x0, y0 = pos[edge[0]]
+# #         x1, y1 = pos[edge[1]]
+# #         edge_x.extend([x0, x1, None])
+# #         edge_y.extend([y0, y1, None])
+# #     edge_trace = go.Scatter(
+# #         x=edge_x,
+# #         y=edge_y,
+# #         line=dict(width=0.5, color="#888"),
+# #         hoverinfo="none",
+# #         mode="lines",
+# #     )
+# #     node_x = []
+# #     node_y = []
+# #     node_text = []
+# #     for node in graph.nodes():
+# #         x, y = pos[node]
+# #         node_x.append(x)
+# #         node_y.append(y)
+# #         label = graph.nodes[node].get("label", str(node))  # Default to node ID if label is missing
+# #         pagerank_score = pagerank.get(node, 0)
+# #         node_text.append(f"{label}<br>{pagerank_score:.4f}")
+# #     node_trace = go.Scatter(
+# #         x=node_x,
+# #         y=node_y,
+# #         mode="markers+text",
+# #         text=node_text,
+# #         hoverinfo="text",
+# #         marker=dict(
+# #             showscale=True,
+# #             colorscale="YlGnBu",
+# #             size=10,
+# #             color=list(pagerank.values()),
+# #             colorbar=dict(
+# #                 thickness=15,
+# #                 title="PageRank",
+# #                 xanchor="left",
+# #                 titleside="right"
+# #             ),
+# #         ),
+# #     )
+# #     fig = go.Figure(data=[edge_trace, node_trace])
+# #     fig.update_layout(
+# #         showlegend=False,
+# #         hovermode="closest",
+# #         margin=dict(b=0, l=0, r=0, t=0),
+# #         xaxis=dict(showgrid=False, zeroline=False),
+# #         yaxis=dict(showgrid=False, zeroline=False),
+# #     )
+# #     return fig
+# # def classify_and_visualize_pdf(pdf_path):
+# #     try:
+# #         # Step 1: Extract text from the PDF
+# #         text = extract_text_from_pdf(pdf_path)
+# #         # Step 2: Process text to generate a knowledge graph
+# #         graph_documents = process_text_with_llm(text)
+# #         graph = build_graph(graph_documents)
+# #         # Step 3: Calculate PageRank and classify
+# #         avg_top_5_pagerank, pagerank = calculate_average_top_5_pagerank(graph)
+# #         classification = "Publishable" if avg_top_5_pagerank >= 0.0526 else "Non-Publishable"
+# #         # Step 4: Draw the static graph and save as image
+# #         static_graph_path = "knowledge_graph.png"
+# #         draw_static_graph(graph, static_graph_path)
+# #         # Step 5: Generate the interactive Plotly graph
+# #         interactive_fig = generate_interactive_plotly_graph(graph, pagerank)
+# #         # Step 6: Prepare formatted result
+# #         result_html = f"""
+# #         <h3>Classification Result</h3>
+# #         <p><strong>Classification:</strong> {classification}</p>
+# #         <p><strong>Average Top 5 PageRank:</strong> {avg_top_5_pagerank:.4f}</p>
+# #         """
+# #         return result_html, static_graph_path, interactive_fig
+# #     except Exception as e:
+# #         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
+# # # Gradio app instance
+# # with gr.Blocks() as demo:
+# #     gr.Markdown(
+# #         """
+# #         # 📄 Research Paper Classifier with Knowledge Graphs
+# #         Upload a PDF research paper, and the app will:
+# #         1. **Generate a Static Knowledge Graph**
+# #         2. **Generate an Interactive Knowledge Graph** (hover over nodes to see details)
+# #         3. **Calculate PageRank and classify the paper as Publishable or Non-Publishable**
+# #         """
+# #     )
+# #     with gr.Row():
+# #         pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
+# #         submit_btn = gr.Button("Classify Paper")
+# #     with gr.Row():
+# #         result_output = gr.HTML(label="Classification Result")
+# #     with gr.Row():
+# #         static_graph_output = gr.Image(label="Static Knowledge Graph", type="filepath")
+# #         interactive_graph_output = gr.Plot(label="Interactive Knowledge Graph")
+# #     submit_btn.click(
+# #         fn=classify_and_visualize_pdf,
+# #         inputs=pdf_input,
+# #         outputs=[result_output, static_graph_output, interactive_graph_output],
+# #     )
+# # demo.launch()
 # import gradio as gr
 # import pdfplumber
 # import networkx as nx
 # llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
 # llm_transformer = LLMGraphTransformer(llm=llm)
 # def extract_text_from_pdf(pdf_path):
 #     with pdfplumber.open(pdf_path) as pdf:
 #         extracted_text = "".join([page.extract_text() for page in pdf.pages])
 #     return extracted_text
 # def scibert_chunking(text, chunk_size=256, max_chunks=6):
 #     tokens = tokenizer.tokenize(text)
 #     chunks = [
 #     ]
 #     return chunks
 # def process_text_with_llm(text):
 #     chunks = scibert_chunking(text)
 #     documents = [Document(page_content=chunk) for chunk in chunks]
 #     ]
 #     return graph_documents
 # def build_graph(graph_documents):
 #     graph = nx.DiGraph()
 #     for graph_doc in graph_documents:
 #             graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
 #     return graph
 # def calculate_average_top_5_pagerank(graph):
 #     pagerank = nx.pagerank(graph)
 #     top_5 = sorted(pagerank.values(), reverse=True)[:5]
 #     return sum(top_5) / len(top_5) if top_5 else 0, pagerank
 # def draw_static_graph(graph, output_path="graph.png"):
 #     plt.figure(figsize=(10, 8))
 #     pos = nx.spring_layout(graph, seed=42)
 #     plt.savefig(output_path)
 #     plt.close()
 # def generate_interactive_plotly_graph(graph, pagerank):
 #     pos = nx.spring_layout(graph, seed=42)  # Generate positions for nodes
 #     return fig
 # def classify_and_visualize_pdf(pdf_path):
 #     try:
 #         # Step 1: Extract text from the PDF
 #     except Exception as e:
 #         return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
 # # Gradio app instance
+# with gr.Blocks(css="""
+#     body {background: linear-gradient(to right, #6A11CB, #2575FC); color: white;}
+#     .gr-button {background: #34A853; color: white; border-radius: 8px;}
+#     .gr-button:hover {background: #2F8A43;}
+#     .gr-markdown {font-family: 'Roboto', sans-serif; text-align: center;}
+#     .gr-file-upload {border: 2px dashed #fff;}
+#     .gr-row {padding: 10px; justify-content: center; align-items: center;}
+# """) as demo:
 #     gr.Markdown(
 #         """
 #         # 📄 Research Paper Classifier with Knowledge Graphs
 #     )
 #     with gr.Row():
+#         pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"], elem_id="upload")
+#         submit_btn = gr.Button("Classify Paper", elem_id="submit")
 #     with gr.Row():
 #         result_output = gr.HTML(label="Classification Result")
 # Gradio app instance
 with gr.Blocks(css="""
+    body {
+        background: linear-gradient(to right, #6A11CB, #2575FC);
+        color: white;
+        font-family: 'Poppins', sans-serif;
+    }
+    .gr-button {
+        background: #34A853;
+        color: white;
+        border-radius: 8px;
+        padding: 10px 20px;
+        font-size: 16px;
+        transition: background 0.3s ease;
+    }
+    .gr-button:hover {
+        background: #2F8A43;
+        transform: scale(1.05);
+    }
+    .gr-markdown {
+        font-family: 'Poppins', sans-serif;
+        text-align: center;
+        font-size: 18px;
+        padding: 10px;
+        background: rgba(255, 255, 255, 0.2);
+        border-radius: 10px;
+    }
+    .gr-file-upload {
+        border: 2px dashed #fff;
+        padding: 20px;
+        border-radius: 10px;
+        transition: border-color 0.3s ease;
+    }
+    .gr-file-upload:hover {
+        border-color: #34A853;
+    }
+    .gr-row {
+        padding: 10px;
+        justify-content: center;
+        align-items: center;
+    }
 """) as demo:
     gr.Markdown(
         """