Shreyas094 commited on
Commit
d06c0f4
1 Parent(s): 05ba9f1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -57
app.py CHANGED
@@ -3,70 +3,71 @@ import gradio as gr
3
  from PyPDF2 import PdfReader
4
  import requests
5
  from dotenv import load_dotenv
6
-
7
  # Load environment variables
8
  load_dotenv()
9
-
10
  # Get the Hugging Face API token
11
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
12
-
13
- def summarize_text(text, instructions):
14
- API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
15
- headers = {"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"}
16
-
17
- payload = {
18
- "inputs": f"{instructions}\n\nText to summarize:\n{text}",
19
- "parameters": {"max_length": 500}
20
- }
21
-
22
- response = requests.post(API_URL, headers=headers, json=payload)
23
- return response.json()[0]["generated_text"]
24
-
25
  def process_pdf(pdf_file, chunk_instructions, final_instructions):
26
- # Read PDF
27
- reader = PdfReader(pdf_file)
28
- text = ""
29
- for page in reader.pages:
30
- text += page.extract_text() + "\n\n"
31
-
32
- # Chunk the text (simple splitting by pages for this example)
33
- chunks = text.split("\n\n")
34
-
35
- # Agent 1: Summarize each chunk
36
- agent1_summaries = []
37
- for chunk in chunks:
38
- summary = summarize_text(chunk, chunk_instructions)
39
- agent1_summaries.append(summary)
40
-
41
- # Concatenate Agent 1 summaries
42
- concatenated_summary = "\n\n".join(agent1_summaries)
43
-
44
- # Agent 2: Final summarization
45
- final_summary = summarize_text(concatenated_summary, final_instructions)
46
-
47
- return final_summary
48
-
 
 
49
  def pdf_summarizer(pdf_file, chunk_instructions, final_instructions):
50
- if pdf_file is None:
51
- return "Please upload a PDF file."
52
-
53
- try:
54
- summary = process_pdf(pdf_file.name, chunk_instructions, final_instructions)
55
- return summary
56
- except Exception as e:
57
- return f"An error occurred: {str(e)}"
58
-
 
 
59
  # Gradio interface
60
  iface = gr.Interface(
61
- fn=pdf_summarizer,
62
- inputs=[
63
- gr.File(label="Upload PDF"),
64
- gr.Textbox(label="Chunk Instructions", placeholder="Instructions for summarizing each chunk"),
65
- gr.Textbox(label="Final Instructions", placeholder="Instructions for final summarization")
66
- ],
67
- outputs=gr.Textbox(label="Summary"),
68
- title="PDF Earnings Summary Generator",
69
- description="Upload a PDF of an earnings summary or transcript to generate a concise summary."
70
  )
71
-
72
  iface.launch()
 
3
  from PyPDF2 import PdfReader
4
  import requests
5
  from dotenv import load_dotenv
 
6
  # Load environment variables
7
  load_dotenv()
 
8
  # Get the Hugging Face API token
9
  HUGGINGFACE_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
10
+ def summarize_text(text, instructions, agent_name):
11
+ print(f"{agent_name}: Starting summarization")
12
+ API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct"
13
+ headers = {"Authorization": f"Bearer {HUGGINGFACE_TOKEN}"}
14
+ payload = {
15
+ "inputs": f"{instructions}\n\nText to summarize:\n{text}",
16
+ "parameters": {"max_length": 500}
17
+ }
18
+ print(f"{agent_name}: Sending request to API")
19
+ response = requests.post(API_URL, headers=headers, json=payload)
20
+ print(f"{agent_name}: Received response from API")
21
+ return response.json()[0]["generated_text"]
 
22
  def process_pdf(pdf_file, chunk_instructions, final_instructions):
23
+ print("Starting PDF processing")
24
+ # Read PDF
25
+ reader = PdfReader(pdf_file)
26
+ text = ""
27
+ for page in reader.pages:
28
+ text += page.extract_text() + "\n\n"
29
+ print(f"Extracted {len(reader.pages)} pages from PDF")
30
+ # Chunk the text (simple splitting by pages for this example)
31
+ chunks = text.split("\n\n")
32
+ print(f"Split text into {len(chunks)} chunks")
33
+ # Agent 1: Summarize each chunk
34
+ agent1_summaries = []
35
+ for i, chunk in enumerate(chunks):
36
+ print(f"Agent 1: Processing chunk {i+1}/{len(chunks)}")
37
+ summary = summarize_text(chunk, chunk_instructions, "Agent 1")
38
+ agent1_summaries.append(summary)
39
+ print("Agent 1: Finished processing all chunks")
40
+ # Concatenate Agent 1 summaries
41
+ concatenated_summary = "\n\n".join(agent1_summaries)
42
+ print(f"Concatenated Agent 1 summaries (length: {len(concatenated_summary)})")
43
+ # Agent 2: Final summarization
44
+ print("Agent 2: Starting final summarization")
45
+ final_summary = summarize_text(concatenated_summary, final_instructions, "Agent 2")
46
+ print("Agent 2: Finished final summarization")
47
+ return final_summary
48
  def pdf_summarizer(pdf_file, chunk_instructions, final_instructions):
49
+ if pdf_file is None:
50
+ print("Error: No PDF file uploaded")
51
+ return "Please upload a PDF file."
52
+ try:
53
+ print(f"Starting summarization process for file: {pdf_file.name}")
54
+ summary = process_pdf(pdf_file.name, chunk_instructions, final_instructions)
55
+ print("Summarization process completed successfully")
56
+ return summary
57
+ except Exception as e:
58
+ print(f"An error occurred: {str(e)}")
59
+ return f"An error occurred: {str(e)}"
60
  # Gradio interface
61
  iface = gr.Interface(
62
+ fn=pdf_summarizer,
63
+ inputs=[
64
+ gr.File(label="Upload PDF"),
65
+ gr.Textbox(label="Chunk Instructions", placeholder="Instructions for summarizing each chunk"),
66
+ gr.Textbox(label="Final Instructions", placeholder="Instructions for final summarization")
67
+ ],
68
+ outputs=gr.Textbox(label="Summary"),
69
+ title="PDF Earnings Summary Generator",
70
+ description="Upload a PDF of an earnings summary or transcript to generate a concise summary."
71
  )
72
+ print("Launching Gradio interface")
73
  iface.launch()