import streamlit as st import pandas as pd import re from datetime import datetime import fitz # PyMuPDF import plotly.express as px import io def extract_data_from_pdf(pdf_bytes): # Open the PDF file from bytes pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf") extracted_texts = [page.get_text() for page in pdf_document] pdf_document.close() # Define a regular expression pattern to match the lines with temperature data pattern = re.compile(r'(\+\s\d+\sMin\.|\Début)\s*(\d+°C)\s*(\d+°C)\s*(\d{2},\d{2})') structured_data = [] # Process the extracted text from each page for page_text in extracted_texts: # Extract metadata company_match = re.search(r'Entreprise::\s*(.+)', page_text) date_match = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', page_text) user_match = re.search(r'Utilisateur:\s*(.+)', page_text) product_match = re.search(r'Produit:\s*(.+)', page_text) if not all([company_match, date_match, user_match, product_match]): continue # Skip pages without metadata company = company_match.group(1).strip() date = date_match.group(1).replace('/', '.').strip() user = user_match.group(1).strip() product = product_match.group(1).strip() # Find all matches of the pattern in the text matches = pattern.findall(page_text) for match in matches: time, sterilizer_temp, core_temp, f_value = match structured_data.append({ 'Date': datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d'), 'Entreprise': company, 'Utilisateur': user, 'Produit': product, 'Déroulement': time.strip(), 'Temp. du stérilisateur': float(sterilizer_temp.replace('°C', '')), 'Temp. à coeur': float(core_temp.replace('°C', '')), 'Valeur F': float(f_value.replace(',', '.')) }) return pd.DataFrame(structured_data) def analyze_sterilization(data): results = [] for product, group in data.groupby('Produit'): # Determine product type and required temperature is_nutabreizh = 'NutaBreizh' in product required_temp = 108 if is_nutabreizh else 103 # Filter rows where core temperature is above or equal to the required temperature above_required_temp = group[group['Temp. à coeur'] >= required_temp] # Calculate the duration (in minutes) at the required temperature minutes_at_temp = len(above_required_temp) # Calculate max temperatures max_temp_sterilisateur = group['Temp. du stérilisateur'].max() max_temp_coeur = group['Temp. à coeur'].max() # Determine if criteria met (at least 30 minutes at required temperature) criteria_met = minutes_at_temp >= 30 results.append({ 'Date': group['Date'].iloc[0], 'Produit': product, 'Utilisateur': group['Utilisateur'].iloc[0], 'Temperature_Requise': required_temp, 'Minutes_Temperature_Requise': minutes_at_temp, 'Temperature_Max_Sterilisateur': max_temp_sterilisateur, 'Temperature_Max_Coeur': max_temp_coeur, 'Criteres_Respectes': criteria_met }) return pd.DataFrame(results) def main(): st.title("Analyse des Protocoles de Stérilisation") uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf") if uploaded_file is not None: # Read the uploaded file as bytes pdf_bytes = uploaded_file.getvalue() # Process data data = extract_data_from_pdf(pdf_bytes) results_df = analyze_sterilization(data) # Display results st.subheader("Résultats de l'analyse") st.dataframe(results_df) # Check if all criteria are met if not results_df['Criteres_Respectes'].all(): st.warning("Attention : Certains produits n'ont pas respecté les critères de stérilisation.") # Display failed products failed_products = results_df[results_df['Criteres_Respectes'] == False] if not failed_products.empty: st.subheader("Produits n'ayant pas respecté les critères") st.dataframe(failed_products) # Create visualization if not results_df.empty: fig = px.scatter(results_df, x='Date', y='Minutes_Temperature_Requise', color='Criteres_Respectes', hover_data=['Produit', 'Temperature_Requise'], title="Minutes à température requise par production") st.plotly_chart(fig) else: st.warning("Aucune donnée valide à afficher.") # Export button if st.button("Exporter en Excel"): output = io.BytesIO() with pd.ExcelWriter(output, engine='xlsxwriter') as writer: results_df.to_excel(writer, index=False) output.seek(0) st.download_button( label="Télécharger l'analyse", data=output, file_name="analyse_sterilisation.xlsx", mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" ) if __name__ == "__main__": main()