delta-praticle commited on
Commit
34a0db0
Β·
verified Β·
1 Parent(s): 4efcb08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +253 -18
app.py CHANGED
@@ -1,3 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # import gradio as gr
2
  # import pdfplumber
3
  # import networkx as nx
@@ -17,13 +221,11 @@
17
  # llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
18
  # llm_transformer = LLMGraphTransformer(llm=llm)
19
 
20
-
21
  # def extract_text_from_pdf(pdf_path):
22
  # with pdfplumber.open(pdf_path) as pdf:
23
  # extracted_text = "".join([page.extract_text() for page in pdf.pages])
24
  # return extracted_text
25
 
26
-
27
  # def scibert_chunking(text, chunk_size=256, max_chunks=6):
28
  # tokens = tokenizer.tokenize(text)
29
  # chunks = [
@@ -32,7 +234,6 @@
32
  # ]
33
  # return chunks
34
 
35
-
36
  # def process_text_with_llm(text):
37
  # chunks = scibert_chunking(text)
38
  # documents = [Document(page_content=chunk) for chunk in chunks]
@@ -41,7 +242,6 @@
41
  # ]
42
  # return graph_documents
43
 
44
-
45
  # def build_graph(graph_documents):
46
  # graph = nx.DiGraph()
47
  # for graph_doc in graph_documents:
@@ -52,13 +252,11 @@
52
  # graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
53
  # return graph
54
 
55
-
56
  # def calculate_average_top_5_pagerank(graph):
57
  # pagerank = nx.pagerank(graph)
58
  # top_5 = sorted(pagerank.values(), reverse=True)[:5]
59
  # return sum(top_5) / len(top_5) if top_5 else 0, pagerank
60
 
61
-
62
  # def draw_static_graph(graph, output_path="graph.png"):
63
  # plt.figure(figsize=(10, 8))
64
  # pos = nx.spring_layout(graph, seed=42)
@@ -76,7 +274,6 @@
76
  # plt.savefig(output_path)
77
  # plt.close()
78
 
79
-
80
  # def generate_interactive_plotly_graph(graph, pagerank):
81
  # pos = nx.spring_layout(graph, seed=42) # Generate positions for nodes
82
 
@@ -139,7 +336,6 @@
139
 
140
  # return fig
141
 
142
-
143
  # def classify_and_visualize_pdf(pdf_path):
144
  # try:
145
  # # Step 1: Extract text from the PDF
@@ -171,9 +367,15 @@
171
  # except Exception as e:
172
  # return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
173
 
174
-
175
  # # Gradio app instance
176
- # with gr.Blocks() as demo:
 
 
 
 
 
 
 
177
  # gr.Markdown(
178
  # """
179
  # # πŸ“„ Research Paper Classifier with Knowledge Graphs
@@ -185,8 +387,8 @@
185
  # )
186
 
187
  # with gr.Row():
188
- # pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
189
- # submit_btn = gr.Button("Classify Paper")
190
 
191
  # with gr.Row():
192
  # result_output = gr.HTML(label="Classification Result")
@@ -369,12 +571,45 @@ def classify_and_visualize_pdf(pdf_path):
369
 
370
  # Gradio app instance
371
  with gr.Blocks(css="""
372
- body {background: linear-gradient(to right, #6A11CB, #2575FC); color: white;}
373
- .gr-button {background: #34A853; color: white; border-radius: 8px;}
374
- .gr-button:hover {background: #2F8A43;}
375
- .gr-markdown {font-family: 'Roboto', sans-serif; text-align: center;}
376
- .gr-file-upload {border: 2px dashed #fff;}
377
- .gr-row {padding: 10px; justify-content: center; align-items: center;}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  """) as demo:
379
  gr.Markdown(
380
  """
 
1
+ # # import gradio as gr
2
+ # # import pdfplumber
3
+ # # import networkx as nx
4
+ # # import pandas as pd
5
+ # # import matplotlib.pyplot as plt
6
+ # # import plotly.graph_objects as go
7
+ # # from transformers import AutoTokenizer
8
+ # # from langchain_core.documents import Document
9
+ # # from langchain_experimental.graph_transformers import LLMGraphTransformer
10
+ # # from langchain_groq import ChatGroq
11
+ # # import os
12
+
13
+ # # # Initialize components
14
+ # # scibert_model = "allenai/scibert_scivocab_uncased"
15
+ # # tokenizer = AutoTokenizer.from_pretrained(scibert_model)
16
+ # # groq_api_key = "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"
17
+ # # llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
18
+ # # llm_transformer = LLMGraphTransformer(llm=llm)
19
+
20
+
21
+ # # def extract_text_from_pdf(pdf_path):
22
+ # # with pdfplumber.open(pdf_path) as pdf:
23
+ # # extracted_text = "".join([page.extract_text() for page in pdf.pages])
24
+ # # return extracted_text
25
+
26
+
27
+ # # def scibert_chunking(text, chunk_size=256, max_chunks=6):
28
+ # # tokens = tokenizer.tokenize(text)
29
+ # # chunks = [
30
+ # # tokenizer.convert_tokens_to_string(tokens[i : i + chunk_size])
31
+ # # for i in range(0, min(len(tokens), chunk_size * max_chunks), chunk_size)
32
+ # # ]
33
+ # # return chunks
34
+
35
+
36
+ # # def process_text_with_llm(text):
37
+ # # chunks = scibert_chunking(text)
38
+ # # documents = [Document(page_content=chunk) for chunk in chunks]
39
+ # # graph_documents = [
40
+ # # llm_transformer.convert_to_graph_documents([doc])[0] for doc in documents
41
+ # # ]
42
+ # # return graph_documents
43
+
44
+
45
+ # # def build_graph(graph_documents):
46
+ # # graph = nx.DiGraph()
47
+ # # for graph_doc in graph_documents:
48
+ # # for node in graph_doc.nodes:
49
+ # # label = node.properties.get("name", node.id)
50
+ # # graph.add_node(node.id, label=label)
51
+ # # for rel in graph_doc.relationships:
52
+ # # graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
53
+ # # return graph
54
+
55
+
56
+ # # def calculate_average_top_5_pagerank(graph):
57
+ # # pagerank = nx.pagerank(graph)
58
+ # # top_5 = sorted(pagerank.values(), reverse=True)[:5]
59
+ # # return sum(top_5) / len(top_5) if top_5 else 0, pagerank
60
+
61
+
62
+ # # def draw_static_graph(graph, output_path="graph.png"):
63
+ # # plt.figure(figsize=(10, 8))
64
+ # # pos = nx.spring_layout(graph, seed=42)
65
+ # # nx.draw(
66
+ # # graph,
67
+ # # pos,
68
+ # # with_labels=True,
69
+ # # node_size=500,
70
+ # # node_color="lightblue",
71
+ # # font_size=8,
72
+ # # font_weight="bold",
73
+ # # edge_color="gray",
74
+ # # )
75
+ # # plt.title("Static Knowledge Graph")
76
+ # # plt.savefig(output_path)
77
+ # # plt.close()
78
+
79
+
80
+ # # def generate_interactive_plotly_graph(graph, pagerank):
81
+ # # pos = nx.spring_layout(graph, seed=42) # Generate positions for nodes
82
+
83
+ # # edge_x = []
84
+ # # edge_y = []
85
+ # # for edge in graph.edges():
86
+ # # x0, y0 = pos[edge[0]]
87
+ # # x1, y1 = pos[edge[1]]
88
+ # # edge_x.extend([x0, x1, None])
89
+ # # edge_y.extend([y0, y1, None])
90
+
91
+ # # edge_trace = go.Scatter(
92
+ # # x=edge_x,
93
+ # # y=edge_y,
94
+ # # line=dict(width=0.5, color="#888"),
95
+ # # hoverinfo="none",
96
+ # # mode="lines",
97
+ # # )
98
+
99
+ # # node_x = []
100
+ # # node_y = []
101
+ # # node_text = []
102
+ # # for node in graph.nodes():
103
+ # # x, y = pos[node]
104
+ # # node_x.append(x)
105
+ # # node_y.append(y)
106
+
107
+ # # label = graph.nodes[node].get("label", str(node)) # Default to node ID if label is missing
108
+ # # pagerank_score = pagerank.get(node, 0)
109
+ # # node_text.append(f"{label}<br>{pagerank_score:.4f}")
110
+
111
+ # # node_trace = go.Scatter(
112
+ # # x=node_x,
113
+ # # y=node_y,
114
+ # # mode="markers+text",
115
+ # # text=node_text,
116
+ # # hoverinfo="text",
117
+ # # marker=dict(
118
+ # # showscale=True,
119
+ # # colorscale="YlGnBu",
120
+ # # size=10,
121
+ # # color=list(pagerank.values()),
122
+ # # colorbar=dict(
123
+ # # thickness=15,
124
+ # # title="PageRank",
125
+ # # xanchor="left",
126
+ # # titleside="right"
127
+ # # ),
128
+ # # ),
129
+ # # )
130
+
131
+ # # fig = go.Figure(data=[edge_trace, node_trace])
132
+ # # fig.update_layout(
133
+ # # showlegend=False,
134
+ # # hovermode="closest",
135
+ # # margin=dict(b=0, l=0, r=0, t=0),
136
+ # # xaxis=dict(showgrid=False, zeroline=False),
137
+ # # yaxis=dict(showgrid=False, zeroline=False),
138
+ # # )
139
+
140
+ # # return fig
141
+
142
+
143
+ # # def classify_and_visualize_pdf(pdf_path):
144
+ # # try:
145
+ # # # Step 1: Extract text from the PDF
146
+ # # text = extract_text_from_pdf(pdf_path)
147
+
148
+ # # # Step 2: Process text to generate a knowledge graph
149
+ # # graph_documents = process_text_with_llm(text)
150
+ # # graph = build_graph(graph_documents)
151
+
152
+ # # # Step 3: Calculate PageRank and classify
153
+ # # avg_top_5_pagerank, pagerank = calculate_average_top_5_pagerank(graph)
154
+ # # classification = "Publishable" if avg_top_5_pagerank >= 0.0526 else "Non-Publishable"
155
+
156
+ # # # Step 4: Draw the static graph and save as image
157
+ # # static_graph_path = "knowledge_graph.png"
158
+ # # draw_static_graph(graph, static_graph_path)
159
+
160
+ # # # Step 5: Generate the interactive Plotly graph
161
+ # # interactive_fig = generate_interactive_plotly_graph(graph, pagerank)
162
+
163
+ # # # Step 6: Prepare formatted result
164
+ # # result_html = f"""
165
+ # # <h3>Classification Result</h3>
166
+ # # <p><strong>Classification:</strong> {classification}</p>
167
+ # # <p><strong>Average Top 5 PageRank:</strong> {avg_top_5_pagerank:.4f}</p>
168
+ # # """
169
+
170
+ # # return result_html, static_graph_path, interactive_fig
171
+ # # except Exception as e:
172
+ # # return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
173
+
174
+
175
+ # # # Gradio app instance
176
+ # # with gr.Blocks() as demo:
177
+ # # gr.Markdown(
178
+ # # """
179
+ # # # πŸ“„ Research Paper Classifier with Knowledge Graphs
180
+ # # Upload a PDF research paper, and the app will:
181
+ # # 1. **Generate a Static Knowledge Graph**
182
+ # # 2. **Generate an Interactive Knowledge Graph** (hover over nodes to see details)
183
+ # # 3. **Calculate PageRank and classify the paper as Publishable or Non-Publishable**
184
+ # # """
185
+ # # )
186
+
187
+ # # with gr.Row():
188
+ # # pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"])
189
+ # # submit_btn = gr.Button("Classify Paper")
190
+
191
+ # # with gr.Row():
192
+ # # result_output = gr.HTML(label="Classification Result")
193
+
194
+ # # with gr.Row():
195
+ # # static_graph_output = gr.Image(label="Static Knowledge Graph", type="filepath")
196
+ # # interactive_graph_output = gr.Plot(label="Interactive Knowledge Graph")
197
+
198
+ # # submit_btn.click(
199
+ # # fn=classify_and_visualize_pdf,
200
+ # # inputs=pdf_input,
201
+ # # outputs=[result_output, static_graph_output, interactive_graph_output],
202
+ # # )
203
+
204
+ # # demo.launch()
205
  # import gradio as gr
206
  # import pdfplumber
207
  # import networkx as nx
 
221
  # llm = ChatGroq(groq_api_key=groq_api_key, model_name="Gemma2-9b-It")
222
  # llm_transformer = LLMGraphTransformer(llm=llm)
223
 
 
224
  # def extract_text_from_pdf(pdf_path):
225
  # with pdfplumber.open(pdf_path) as pdf:
226
  # extracted_text = "".join([page.extract_text() for page in pdf.pages])
227
  # return extracted_text
228
 
 
229
  # def scibert_chunking(text, chunk_size=256, max_chunks=6):
230
  # tokens = tokenizer.tokenize(text)
231
  # chunks = [
 
234
  # ]
235
  # return chunks
236
 
 
237
  # def process_text_with_llm(text):
238
  # chunks = scibert_chunking(text)
239
  # documents = [Document(page_content=chunk) for chunk in chunks]
 
242
  # ]
243
  # return graph_documents
244
 
 
245
  # def build_graph(graph_documents):
246
  # graph = nx.DiGraph()
247
  # for graph_doc in graph_documents:
 
252
  # graph.add_edge(rel.source.id, rel.target.id, type=rel.type)
253
  # return graph
254
 
 
255
  # def calculate_average_top_5_pagerank(graph):
256
  # pagerank = nx.pagerank(graph)
257
  # top_5 = sorted(pagerank.values(), reverse=True)[:5]
258
  # return sum(top_5) / len(top_5) if top_5 else 0, pagerank
259
 
 
260
  # def draw_static_graph(graph, output_path="graph.png"):
261
  # plt.figure(figsize=(10, 8))
262
  # pos = nx.spring_layout(graph, seed=42)
 
274
  # plt.savefig(output_path)
275
  # plt.close()
276
 
 
277
  # def generate_interactive_plotly_graph(graph, pagerank):
278
  # pos = nx.spring_layout(graph, seed=42) # Generate positions for nodes
279
 
 
336
 
337
  # return fig
338
 
 
339
  # def classify_and_visualize_pdf(pdf_path):
340
  # try:
341
  # # Step 1: Extract text from the PDF
 
367
  # except Exception as e:
368
  # return f"<p style='color:red;'><strong>Error:</strong> {str(e)}</p>", None, None
369
 
 
370
  # # Gradio app instance
371
+ # with gr.Blocks(css="""
372
+ # body {background: linear-gradient(to right, #6A11CB, #2575FC); color: white;}
373
+ # .gr-button {background: #34A853; color: white; border-radius: 8px;}
374
+ # .gr-button:hover {background: #2F8A43;}
375
+ # .gr-markdown {font-family: 'Roboto', sans-serif; text-align: center;}
376
+ # .gr-file-upload {border: 2px dashed #fff;}
377
+ # .gr-row {padding: 10px; justify-content: center; align-items: center;}
378
+ # """) as demo:
379
  # gr.Markdown(
380
  # """
381
  # # πŸ“„ Research Paper Classifier with Knowledge Graphs
 
387
  # )
388
 
389
  # with gr.Row():
390
+ # pdf_input = gr.File(label="Upload PDF", type="filepath", file_types=[".pdf"], elem_id="upload")
391
+ # submit_btn = gr.Button("Classify Paper", elem_id="submit")
392
 
393
  # with gr.Row():
394
  # result_output = gr.HTML(label="Classification Result")
 
571
 
572
  # Gradio app instance
573
  with gr.Blocks(css="""
574
+ body {
575
+ background: linear-gradient(to right, #6A11CB, #2575FC);
576
+ color: white;
577
+ font-family: 'Poppins', sans-serif;
578
+ }
579
+ .gr-button {
580
+ background: #34A853;
581
+ color: white;
582
+ border-radius: 8px;
583
+ padding: 10px 20px;
584
+ font-size: 16px;
585
+ transition: background 0.3s ease;
586
+ }
587
+ .gr-button:hover {
588
+ background: #2F8A43;
589
+ transform: scale(1.05);
590
+ }
591
+ .gr-markdown {
592
+ font-family: 'Poppins', sans-serif;
593
+ text-align: center;
594
+ font-size: 18px;
595
+ padding: 10px;
596
+ background: rgba(255, 255, 255, 0.2);
597
+ border-radius: 10px;
598
+ }
599
+ .gr-file-upload {
600
+ border: 2px dashed #fff;
601
+ padding: 20px;
602
+ border-radius: 10px;
603
+ transition: border-color 0.3s ease;
604
+ }
605
+ .gr-file-upload:hover {
606
+ border-color: #34A853;
607
+ }
608
+ .gr-row {
609
+ padding: 10px;
610
+ justify-content: center;
611
+ align-items: center;
612
+ }
613
  """) as demo:
614
  gr.Markdown(
615
  """