Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import re | |
from datetime import datetime | |
import fitz # PyMuPDF | |
import plotly.express as px | |
import io | |
def extract_data_from_pdf(pdf_path): | |
# Open the PDF file | |
pdf_document = fitz.open(pdf_path) | |
extracted_texts = [page.get_text() for page in pdf_document] | |
pdf_document.close() | |
# Define a regular expression pattern to match the lines with temperature data | |
pattern = re.compile(r'(\+\s\d+\sMin\.|\Début)\s*(\d+°C)\s*(\d+°C)\s*(\d{2},\d{2})') | |
structured_data = [] | |
# Process the extracted text from each page | |
for page_text in extracted_texts: | |
# Extract metadata | |
company_match = re.search(r'Entreprise::\s*(.+)', page_text) | |
date_match = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', page_text) | |
user_match = re.search(r'Utilisateur:\s*(.+)', page_text) | |
product_match = re.search(r'Produit:\s*(.+)', page_text) | |
if not all([company_match, date_match, user_match, product_match]): | |
continue # Skip pages without metadata | |
company = company_match.group(1).strip() | |
date = date_match.group(1).replace('/', '.').strip() | |
user = user_match.group(1).strip() | |
product = product_match.group(1).strip() | |
# Find all matches of the pattern in the text | |
matches = pattern.findall(page_text) | |
for match in matches: | |
time, sterilizer_temp, core_temp, f_value = match | |
structured_data.append({ | |
'Date': datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d'), | |
'Entreprise': company, | |
'Utilisateur': user, | |
'Produit': product, | |
'Déroulement': time.strip(), | |
'Temp. du stérilisateur': float(sterilizer_temp.replace('°C', '')), | |
'Temp. à coeur': float(core_temp.replace('°C', '')), | |
'Valeur F': float(f_value.replace(',', '.')) | |
}) | |
return pd.DataFrame(structured_data) | |
def analyze_sterilization(data): | |
results = [] | |
for product, group in data.groupby('Produit'): | |
# Determine product type and required temperature | |
is_nutabreizh = 'NutaBreizh' in product | |
required_temp = 108 if is_nutabreizh else 103 | |
# Count minutes at required temperature | |
minutes_at_temp = len(group[group['Temp. à coeur'] >= required_temp]) | |
# Calculate max temperatures | |
max_temp_sterilisateur = group['Temp. du stérilisateur'].max() | |
max_temp_coeur = group['Temp. à coeur'].max() | |
# Determine if criteria met | |
criteria_met = minutes_at_temp >= 30 | |
results.append({ | |
'Date': group['Date'].iloc[0], | |
'Produit': product, | |
'Utilisateur': group['Utilisateur'].iloc[0], | |
'Temperature_Requise': required_temp, | |
'Minutes_Temperature_Requise': minutes_at_temp, | |
'Temperature_Max_Sterilisateur': max_temp_sterilisateur, | |
'Temperature_Max_Coeur': max_temp_coeur, | |
'Criteres_Respectes': criteria_met | |
}) | |
return pd.DataFrame(results) | |
def main(): | |
st.title("Analyse des Protocoles de Stérilisation") | |
uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf") | |
if uploaded_file is not None: | |
# Process data | |
data = extract_data_from_pdf(uploaded_file) | |
results_df = analyze_sterilization(data) | |
# Display results | |
st.subheader("Résultats de l'analyse") | |
st.dataframe(results_df) | |
# Create visualization | |
if not results_df.empty: | |
fig = px.scatter(results_df, | |
x='Date', | |
y='Minutes_Temperature_Requise', | |
color='Criteres_Respectes', | |
hover_data=['Produit', 'Temperature_Requise'], | |
title="Minutes à température requise par production") | |
st.plotly_chart(fig) | |
else: | |
st.warning("Aucune donnée valide à afficher.") | |
# Export button | |
if st.button("Exporter en Excel"): | |
output = io.BytesIO() | |
with pd.ExcelWriter(output, engine='xlsxwriter') as writer: | |
results_df.to_excel(writer, index=False) | |
output.seek(0) | |
st.download_button( | |
label="Télécharger l'analyse", | |
data=output, | |
file_name="analyse_sterilisation.xlsx", | |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" | |
) | |
if __name__ == "__main__": | |
main() |