erayman09 commited on
Commit
1120d0b
Β·
verified Β·
1 Parent(s): 001afe1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -46
app.py CHANGED
@@ -1,18 +1,17 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
- from transformers import pipeline
4
  from PIL import Image
5
  from PyPDF2 import PdfReader
6
  from reportlab.lib.pagesizes import letter
7
  from reportlab.pdfgen import canvas
8
 
9
- # Load Hugging Face OCR model for text extraction
10
  ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
11
 
12
- # Load Bio_ClinicalBERT model for medical analysis
13
- model_name = "emilyalsentzer/Bio_ClinicalBERT"
14
- tokenizer = AutoTokenizer.from_pretrained(model_name)
15
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
16
 
17
  # Function to extract text from images or PDFs
18
  def extract_text(file_path):
@@ -28,62 +27,69 @@ def extract_text(file_path):
28
  except Exception as e:
29
  return f"Error processing the file: {e}"
30
 
31
- # Function to generate a PDF report
32
- def create_pdf_report(analysis, output_path):
33
- c = canvas.Canvas(output_path, pagesize=letter)
34
- c.drawString(100, 750, "Blood Test Report Analysis")
35
- c.drawString(100, 730, "---------------------------")
36
- y_position = 700
37
- for line in analysis.split("\n"):
38
- c.drawString(100, y_position, line)
39
- y_position -= 20
40
- c.save()
41
- return output_path
42
 
43
- # Function to analyze blood test reports using Bio_ClinicalBERT
44
- def analyze_blood_test(file):
 
 
 
 
 
 
 
 
45
  try:
46
- # Step 1: Extract text from uploaded file
47
  extracted_text = extract_text(file)
48
  if not extracted_text.strip():
49
  return "No readable text found in the uploaded file.", None
50
 
51
- # Step 2: Tokenize and pass through Bio_ClinicalBERT
52
- inputs = tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True)
53
- outputs = model(**inputs)
54
-
55
- # Step 3: Process logits and generate meaningful labels
56
- logits = outputs.logits
57
- predictions = logits.softmax(dim=-1)
58
 
59
- # Generate analysis report
60
- analysis_report = "πŸ” Analysis Results:\n"
61
- for i, score in enumerate(predictions[0]):
62
- token = tokenizer.decode([i]).strip()
63
- if token not in ["[PAD]", "[unused1]"]: # Filter out invalid tokens
64
- analysis_report += f"- {token}: {score.item():.2f}\n"
65
 
66
- # Step 4: Generate a PDF report
67
- output_pdf = "analysis_report.pdf"
68
- create_pdf_report(f"Extracted Text:\n{extracted_text}\n\n{analysis_report}", output_pdf)
69
 
70
- return analysis_report, output_pdf
71
  except Exception as e:
72
  return f"Error processing file: {e}", None
73
 
74
- # Gradio interface setup
 
 
 
 
 
 
 
 
 
 
 
 
75
  interface = gr.Interface(
76
- fn=analyze_blood_test,
77
- inputs=gr.File(label="Upload your Blood Test Report (PNG, JPG, JPEG, or PDF)"),
78
  outputs=[
79
- gr.Textbox(label="Analysis Results"),
80
  gr.File(label="Download PDF Report")
81
  ],
82
- title="MedAI Analyzer",
83
  description=(
84
- "Upload your blood test report in image (PNG, JPG, JPEG) or PDF format. "
85
- "The app will extract and analyze the values, flag abnormalities, and provide health recommendations. "
86
- "You can also download a detailed PDF report of the analysis."
87
  ),
88
  allow_flagging="never"
89
  )
 
1
  import gradio as gr
2
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
 
3
  from PIL import Image
4
  from PyPDF2 import PdfReader
5
  from reportlab.lib.pagesizes import letter
6
  from reportlab.pdfgen import canvas
7
 
8
+ # Load OCR model for extracting text from images
9
  ocr_model = pipeline("image-to-text", model="nlpconnect/vit-gpt2-image-captioning")
10
 
11
+ # Load medical AI model (BioGPT or similar) for prescription validation
12
+ medical_model_name = "microsoft/BioGPT"
13
+ medical_tokenizer = AutoTokenizer.from_pretrained(medical_model_name)
14
+ medical_model = AutoModelForSequenceClassification.from_pretrained(medical_model_name)
15
 
16
  # Function to extract text from images or PDFs
17
  def extract_text(file_path):
 
27
  except Exception as e:
28
  return f"Error processing the file: {e}"
29
 
30
+ # Function to validate prescription using the medical model
31
+ def validate_prescription_with_model(extracted_text):
32
+ # Tokenize and process with the AI model
33
+ inputs = medical_tokenizer(extracted_text, return_tensors="pt", truncation=True, padding=True)
34
+ outputs = medical_model(**inputs)
35
+ logits = outputs.logits
36
+ predictions = logits.softmax(dim=-1)
 
 
 
 
37
 
38
+ # Generate model-driven validation results
39
+ validation_report = "πŸ” Prescription Validation Results:\n"
40
+ for i, score in enumerate(predictions[0]):
41
+ token = medical_tokenizer.decode([i]).strip()
42
+ if token not in ["[PAD]", "[unused1]"]: # Ignore invalid tokens
43
+ validation_report += f"- {token}: {score.item():.2f}\n"
44
+ return validation_report
45
+
46
+ # Main function to handle prescription analysis
47
+ def analyze_prescription(file):
48
  try:
49
+ # Step 1: Extract text
50
  extracted_text = extract_text(file)
51
  if not extracted_text.strip():
52
  return "No readable text found in the uploaded file.", None
53
 
54
+ # Step 2: Validate prescription using AI model
55
+ validation_report = validate_prescription_with_model(extracted_text)
 
 
 
 
 
56
 
57
+ # Combine the extracted text and validation results
58
+ full_report = f"Extracted Text:\n{extracted_text}\n\n{validation_report}"
 
 
 
 
59
 
60
+ # Step 3: Generate a PDF report
61
+ output_pdf = "prescription_validation_report.pdf"
62
+ create_pdf_report(full_report, output_pdf)
63
 
64
+ return full_report, output_pdf
65
  except Exception as e:
66
  return f"Error processing file: {e}", None
67
 
68
+ # Function to create a PDF report
69
+ def create_pdf_report(content, output_path):
70
+ c = canvas.Canvas(output_path, pagesize=letter)
71
+ c.drawString(100, 750, "Prescription Validation Report")
72
+ c.drawString(100, 730, "------------------------------")
73
+ y_position = 700
74
+ for line in content.split("\n"):
75
+ c.drawString(100, y_position, line)
76
+ y_position -= 20
77
+ c.save()
78
+ return output_path
79
+
80
+ # Gradio interface
81
  interface = gr.Interface(
82
+ fn=analyze_prescription,
83
+ inputs=gr.File(label="Upload your Prescription (PNG, JPG, JPEG, or PDF)"),
84
  outputs=[
85
+ gr.Textbox(label="Validation Results"),
86
  gr.File(label="Download PDF Report")
87
  ],
88
+ title="AI-Powered Prescription Validator",
89
  description=(
90
+ "Upload your medical prescription in image (PNG, JPG, JPEG) or PDF format. "
91
+ "The app will extract the text, analyze it using advanced AI models, and validate the prescription. "
92
+ "Download a comprehensive PDF report of the validation results."
93
  ),
94
  allow_flagging="never"
95
  )