Spaces:
Sleeping
Sleeping
File size: 4,719 Bytes
72b45b0 c291e47 72b45b0 c291e47 72b45b0 c291e47 72b45b0 c291e47 72b45b0 c291e47 72b45b0 c291e47 72b45b0 c291e47 72b45b0 c291e47 72b45b0 c291e47 72b45b0 1ae7d42 72b45b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import streamlit as st
import pandas as pd
import re
from datetime import datetime
import fitz # PyMuPDF
import plotly.express as px
import io
def extract_data_from_pdf(pdf_path):
# Open the PDF file
pdf_document = fitz.open(pdf_path)
extracted_texts = [page.get_text() for page in pdf_document]
pdf_document.close()
# Define a regular expression pattern to match the lines with temperature data
pattern = re.compile(r'(\+\s\d+\sMin\.|\Début)\s*(\d+°C)\s*(\d+°C)\s*(\d{2},\d{2})')
structured_data = []
# Process the extracted text from each page
for page_text in extracted_texts:
# Extract metadata
company_match = re.search(r'Entreprise::\s*(.+)', page_text)
date_match = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', page_text)
user_match = re.search(r'Utilisateur:\s*(.+)', page_text)
product_match = re.search(r'Produit:\s*(.+)', page_text)
if not all([company_match, date_match, user_match, product_match]):
continue # Skip pages without metadata
company = company_match.group(1).strip()
date = date_match.group(1).replace('/', '.').strip()
user = user_match.group(1).strip()
product = product_match.group(1).strip()
# Find all matches of the pattern in the text
matches = pattern.findall(page_text)
for match in matches:
time, sterilizer_temp, core_temp, f_value = match
structured_data.append({
'Date': datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d'),
'Entreprise': company,
'Utilisateur': user,
'Produit': product,
'Déroulement': time.strip(),
'Temp. du stérilisateur': float(sterilizer_temp.replace('°C', '')),
'Temp. à coeur': float(core_temp.replace('°C', '')),
'Valeur F': float(f_value.replace(',', '.'))
})
return pd.DataFrame(structured_data)
def analyze_sterilization(data):
results = []
for product, group in data.groupby('Produit'):
# Determine product type and required temperature
is_nutabreizh = 'NutaBreizh' in product
required_temp = 108 if is_nutabreizh else 103
# Count minutes at required temperature
minutes_at_temp = len(group[group['Temp. à coeur'] >= required_temp])
# Calculate max temperatures
max_temp_sterilisateur = group['Temp. du stérilisateur'].max()
max_temp_coeur = group['Temp. à coeur'].max()
# Determine if criteria met
criteria_met = minutes_at_temp >= 30
results.append({
'Date': group['Date'].iloc[0],
'Produit': product,
'Utilisateur': group['Utilisateur'].iloc[0],
'Temperature_Requise': required_temp,
'Minutes_Temperature_Requise': minutes_at_temp,
'Temperature_Max_Sterilisateur': max_temp_sterilisateur,
'Temperature_Max_Coeur': max_temp_coeur,
'Criteres_Respectes': criteria_met
})
return pd.DataFrame(results)
def main():
st.title("Analyse des Protocoles de Stérilisation")
uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf")
if uploaded_file is not None:
# Process data
data = extract_data_from_pdf(uploaded_file)
results_df = analyze_sterilization(data)
# Display results
st.subheader("Résultats de l'analyse")
st.dataframe(results_df)
# Create visualization
if not results_df.empty:
fig = px.scatter(results_df,
x='Date',
y='Minutes_Temperature_Requise',
color='Criteres_Respectes',
hover_data=['Produit', 'Temperature_Requise'],
title="Minutes à température requise par production")
st.plotly_chart(fig)
else:
st.warning("Aucune donnée valide à afficher.")
# Export button
if st.button("Exporter en Excel"):
output = io.BytesIO()
with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
results_df.to_excel(writer, index=False)
output.seek(0)
st.download_button(
label="Télécharger l'analyse",
data=output,
file_name="analyse_sterilisation.xlsx",
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
)
if __name__ == "__main__":
main() |