KERMIT2025 / app.py
MMOON's picture
Update app.py
c291e47 verified
raw
history blame
4.72 kB
import streamlit as st
import pandas as pd
import re
from datetime import datetime
import fitz # PyMuPDF
import plotly.express as px
import io
def extract_data_from_pdf(pdf_path):
# Open the PDF file
pdf_document = fitz.open(pdf_path)
extracted_texts = [page.get_text() for page in pdf_document]
pdf_document.close()
# Define a regular expression pattern to match the lines with temperature data
pattern = re.compile(r'(\+\s\d+\sMin\.|\Début)\s*(\d+°C)\s*(\d+°C)\s*(\d{2},\d{2})')
structured_data = []
# Process the extracted text from each page
for page_text in extracted_texts:
# Extract metadata
company_match = re.search(r'Entreprise::\s*(.+)', page_text)
date_match = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', page_text)
user_match = re.search(r'Utilisateur:\s*(.+)', page_text)
product_match = re.search(r'Produit:\s*(.+)', page_text)
if not all([company_match, date_match, user_match, product_match]):
continue # Skip pages without metadata
company = company_match.group(1).strip()
date = date_match.group(1).replace('/', '.').strip()
user = user_match.group(1).strip()
product = product_match.group(1).strip()
# Find all matches of the pattern in the text
matches = pattern.findall(page_text)
for match in matches:
time, sterilizer_temp, core_temp, f_value = match
structured_data.append({
'Date': datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d'),
'Entreprise': company,
'Utilisateur': user,
'Produit': product,
'Déroulement': time.strip(),
'Temp. du stérilisateur': float(sterilizer_temp.replace('°C', '')),
'Temp. à coeur': float(core_temp.replace('°C', '')),
'Valeur F': float(f_value.replace(',', '.'))
})
return pd.DataFrame(structured_data)
def analyze_sterilization(data):
results = []
for product, group in data.groupby('Produit'):
# Determine product type and required temperature
is_nutabreizh = 'NutaBreizh' in product
required_temp = 108 if is_nutabreizh else 103
# Count minutes at required temperature
minutes_at_temp = len(group[group['Temp. à coeur'] >= required_temp])
# Calculate max temperatures
max_temp_sterilisateur = group['Temp. du stérilisateur'].max()
max_temp_coeur = group['Temp. à coeur'].max()
# Determine if criteria met
criteria_met = minutes_at_temp >= 30
results.append({
'Date': group['Date'].iloc[0],
'Produit': product,
'Utilisateur': group['Utilisateur'].iloc[0],
'Temperature_Requise': required_temp,
'Minutes_Temperature_Requise': minutes_at_temp,
'Temperature_Max_Sterilisateur': max_temp_sterilisateur,
'Temperature_Max_Coeur': max_temp_coeur,
'Criteres_Respectes': criteria_met
})
return pd.DataFrame(results)
def main():
st.title("Analyse des Protocoles de Stérilisation")
uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf")
if uploaded_file is not None:
# Process data
data = extract_data_from_pdf(uploaded_file)
results_df = analyze_sterilization(data)
# Display results
st.subheader("Résultats de l'analyse")
st.dataframe(results_df)
# Create visualization
if not results_df.empty:
fig = px.scatter(results_df,
x='Date',
y='Minutes_Temperature_Requise',
color='Criteres_Respectes',
hover_data=['Produit', 'Temperature_Requise'],
title="Minutes à température requise par production")
st.plotly_chart(fig)
else:
st.warning("Aucune donnée valide à afficher.")
# Export button
if st.button("Exporter en Excel"):
output = io.BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
results_df.to_excel(writer, index=False)
output.seek(0)
st.download_button(
label="Télécharger l'analyse",
data=output,
file_name="analyse_sterilisation.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
if __name__ == "__main__":
main()