Spaces:

vvinayakkkkk
/

pdf_image

Sleeping

App Files Files Community

pdf_image / app.py

vvinayakkkkk

Update app.py

abcce98 verified 5 months ago

raw

history blame contribute delete

9.2 kB

	import streamlit as st
	import fitz # PyMuPDF
	import io
	from PIL import Image
	import google.generativeai as genai
	from dotenv import load_dotenv
	import os

	# Load environment variables
	load_dotenv()

	# Initialize Gemini model
	genai.configure(api_key=os.getenv('GOOGLE_API_KEY'))
	gemini_model = genai.GenerativeModel('gemini-2.0-flash-exp')

	class SequentialDrawingAnalyzer:
	def __init__(self):
	self.drawings_list = [] # Store all extracted images info
	self.analyzed_drawings = [] # Store analyzed results

	def extract_page_as_image(self, page):
	"""Extract entire page as an image"""
	try:
	# Get the page's pixmap
	zoom = 2 # Increase resolution
	mat = fitz.Matrix(zoom, zoom)
	pix = page.get_pixmap(matrix=mat)

	# Convert pixmap to PIL Image
	img_data = pix.tobytes("png")
	return img_data

	except Exception as e:
	st.warning(f"Could not extract page as image: {str(e)}")
	return None

	def extract_drawings_list(self, pdf_bytes):
	"""First pass: Extract all drawings from PDF and create a list"""
	try:
	doc = fitz.open(stream=pdf_bytes, filetype="pdf")

	for page_num in range(len(doc)):
	page = doc[page_num]
	image_list = page.get_images()

	if len(image_list) > 0:
	# Process individual images if found
	for img_idx, img_info in enumerate(image_list):
	try:
	xref = img_info[0]
	base_image = doc.extract_image(xref)
	image_bytes = base_image["image"]

	self.drawings_list.append({
	'page': page_num + 1,
	'drawing_number': img_idx + 1,
	'xref': xref,
	'image_bytes': image_bytes,
	'type': 'embedded'
	})

	except Exception as img_error:
	st.warning(f"Could not extract drawing {img_idx + 1} on page {page_num + 1}: {str(img_error)}")
	else:
	# If no images found, extract entire page as image
	page_image = self.extract_page_as_image(page)
	if page_image:
	self.drawings_list.append({
	'page': page_num + 1,
	'drawing_number': 1,
	'image_bytes': page_image,
	'type': 'full_page'
	})
	st.info(f"Extracted page {page_num + 1} as full-page drawing")

	doc.close()
	return len(self.drawings_list)

	except Exception as e:
	st.error(f"Error extracting drawings: {str(e)}")
	return 0

	def analyze_drawing(self, drawing_info):
	"""Analyze a single drawing"""
	try:
	image = Image.open(io.BytesIO(drawing_info['image_bytes']))

	drawing_type = "full page" if drawing_info.get('type') == 'full_page' else "embedded"

	engineering_prompt = f"""
	Analyze this engineering drawing in detail ({drawing_type} drawing). Please provide:
	1. Drawing Type and Purpose
	- Identify the type of drawing (assembly, detail, section view, etc.)
	- Main purpose and function of the depicted component/system

	2. Dimensional Analysis
	- Key dimensions and measurements
	- Scale and proportions
	- Tolerances if specified

	3. Component Details
	- List all visible components and parts
	- Materials specifications if indicated
	- Surface finish markings

	4. Technical Specifications
	- Any technical notes or special instructions
	- Welding symbols or special instructions
	- Reference standards mentioned

	5. Critical Features
	- Important geometric features
	- Key interfaces or connections
	- Safety-critical aspects
	"""

	response = gemini_model.generate_content([
	engineering_prompt,
	image
	])

	return {
	'page': drawing_info['page'],
	'drawing_number': drawing_info['drawing_number'],
	'image': image,
	'analysis': response.text,
	'type': drawing_info.get('type', 'embedded')
	}

	except Exception as e:
	st.error(f"Error analyzing drawing {drawing_info['drawing_number']}: {str(e)}")
	return None

	# Streamlit UI
	st.title("Sequential Engineering Drawing Analyzer")

	# Initialize session state
	if "processed" not in st.session_state:
	st.session_state.processed = False
	if "analyzer" not in st.session_state:
	st.session_state.analyzer = SequentialDrawingAnalyzer()
	if "current_analysis_index" not in st.session_state:
	st.session_state.current_analysis_index = 0
	if "analyzed_drawings" not in st.session_state:
	st.session_state.analyzed_drawings = []

	# File upload
	pdf_file = st.file_uploader("Upload PDF containing engineering drawings", type="pdf")

	if pdf_file is not None:
	# First pass: Extract all drawings if not already processed
	if not st.session_state.processed:
	try:
	with st.spinner("Extracting drawings from PDF..."):
	pdf_bytes = pdf_file.getvalue()
	total_drawings = st.session_state.analyzer.extract_drawings_list(pdf_bytes)
	st.session_state.processed = True

	st.success(f"Found {total_drawings} drawings in the PDF!")

	# Display list of all drawings
	st.subheader("List of Extracted Drawings:")
	for drawing in st.session_state.analyzer.drawings_list:
	drawing_type = "Full Page" if drawing.get('type') == 'full_page' else "Embedded"
	st.write(f"{drawing_type} Drawing {drawing['drawing_number']} on Page {drawing['page']}")

	st.markdown("---")

	except Exception as e:
	st.error(f"Failed to process PDF: {str(e)}")
	st.session_state.processed = False

	# Process drawings sequentially
	if st.session_state.processed:
	remaining_drawings = min(5, len(st.session_state.analyzer.drawings_list)) - st.session_state.current_analysis_index

	if remaining_drawings > 0:
	st.subheader(f"Analyzing Drawing {st.session_state.current_analysis_index + 1} of {min(5, len(st.session_state.analyzer.drawings_list))}")

	# Analyze current drawing
	current_drawing = st.session_state.analyzer.drawings_list[st.session_state.current_analysis_index]

	with st.spinner(f"Analyzing drawing {current_drawing['drawing_number']} from page {current_drawing['page']}..."):
	analysis_result = st.session_state.analyzer.analyze_drawing(current_drawing)

	if analysis_result:
	# Store analysis result
	st.session_state.analyzed_drawings.append(analysis_result)

	# Increment counter
	st.session_state.current_analysis_index += 1

	# Auto-rerun to process next drawing
	if remaining_drawings > 1:
	st.rerun()
	else:
	st.success("Completed analysis of first 5 drawings!")

	elif len(st.session_state.analyzer.drawings_list) > 5:
	st.info("First 5 drawings have been analyzed. Reload the page to analyze a different set of drawings.")

	# Display all analyzed drawings
	if st.session_state.analyzed_drawings:
	st.subheader("Analyzed Drawings:")
	for analysis in st.session_state.analyzed_drawings:
	col1, col2 = st.columns([1, 1])

	with col1:
	drawing_type = "Full Page" if analysis['type'] == 'full_page' else "Embedded"
	st.image(analysis['image'],
	use_column_width=True,
	caption=f"{drawing_type} Drawing {analysis['drawing_number']} (Page {analysis['page']})")

	with col2:
	st.markdown("### Analysis Results")
	st.markdown(analysis['analysis'])

	st.markdown("---")
	else:
	st.info("Please upload a PDF file containing engineering drawings to begin analysis.")