Ahmed235 commited on
Commit
5bc6c98
1 Parent(s): c0599d5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -23
app.py CHANGED
@@ -4,54 +4,63 @@ from pptx import Presentation
4
  import re
5
  import json
6
 
7
- # Create a text classification pipeline
8
  classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
9
  summarizer = pipeline("summarization", model="Falconsai/text_summarization")
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def extract_text_from_pptx(file_path):
12
- presentation = Presentation(file_path)
13
- text = []
14
- for slide_number, slide in enumerate(presentation.slides, start=1):
15
- for shape in slide.shapes:
16
- if hasattr(shape, "text"):
17
- text.append(shape.text)
18
- return "\n".join(text)
 
 
 
 
19
 
 
20
  def limit_text_length(text, max_length=512):
21
- # Truncate or limit the text length
22
  return text[:max_length]
23
 
 
24
  def predict_pptx_content(file_path):
25
  try:
 
26
  extracted_text = extract_text_from_pptx(file_path)
27
  cleaned_text = re.sub(r'\s+', ' ', extracted_text)
28
-
29
-
30
  limited_text = limit_text_length(cleaned_text)
31
-
32
-
33
  result = classifier(limited_text)
34
-
35
  predicted_label = result[0]['label']
36
  predicted_probability = result[0]['score']
37
  summary = summarizer(cleaned_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text']
38
-
39
  output = {
40
  "predicted_label": predicted_label,
41
  "evaluation": predicted_probability,
42
  "summary": summary
43
  }
44
-
45
- output_dict = json.dumps(output, indent = 3)
46
-
47
- return output_dict
48
-
49
  except Exception as e:
50
- # Log the error details
51
- print(f"Error in predict_pptx_content: {e}")
52
  return {"error": str(e)}
53
 
54
-
55
  iface = gr.Interface(
56
  fn=predict_pptx_content,
57
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
 
4
  import re
5
  import json
6
 
7
+ # Load the classification and summarization pipelines
8
  classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
9
  summarizer = pipeline("summarization", model="Falconsai/text_summarization")
10
 
11
+ # Cache for model weights
12
+ classification_model_loaded = False
13
+ summarization_model_loaded = False
14
+
15
+ def load_models():
16
+ global classifier, summarizer, classification_model_loaded, summarization_model_loaded
17
+ if not classification_model_loaded:
18
+ classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
19
+ classification_model_loaded = True
20
+ if not summarization_model_loaded:
21
+ summarizer = pipeline("summarization", model="Falconsai/text_summarization")
22
+ summarization_model_loaded = True
23
+
24
+ # Extract text from PowerPoint
25
  def extract_text_from_pptx(file_path):
26
+ try:
27
+ presentation = Presentation(file_path)
28
+ text = []
29
+ for slide in presentation.slides:
30
+ for shape in slide.shapes:
31
+ if hasattr(shape, "text"):
32
+ text.append(shape.text)
33
+ return "\n".join(text)
34
+ except Exception as e:
35
+ print(f"Error extracting text from PowerPoint: {e}")
36
+ return ""
37
 
38
+ # Limit text length
39
  def limit_text_length(text, max_length=512):
 
40
  return text[:max_length]
41
 
42
+ # Predict content from PowerPoint
43
  def predict_pptx_content(file_path):
44
  try:
45
+ load_models() # Load models if not loaded already
46
  extracted_text = extract_text_from_pptx(file_path)
47
  cleaned_text = re.sub(r'\s+', ' ', extracted_text)
 
 
48
  limited_text = limit_text_length(cleaned_text)
 
 
49
  result = classifier(limited_text)
 
50
  predicted_label = result[0]['label']
51
  predicted_probability = result[0]['score']
52
  summary = summarizer(cleaned_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text']
 
53
  output = {
54
  "predicted_label": predicted_label,
55
  "evaluation": predicted_probability,
56
  "summary": summary
57
  }
58
+ return json.dumps(output, indent=3)
 
 
 
 
59
  except Exception as e:
60
+ print(f"Error predicting content from PowerPoint: {e}")
 
61
  return {"error": str(e)}
62
 
63
+ # Gradio interface
64
  iface = gr.Interface(
65
  fn=predict_pptx_content,
66
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),