File size: 4,719 Bytes
72b45b0
 
 
 
c291e47
72b45b0
 
 
c291e47
 
 
 
 
 
 
 
 
 
 
 
72b45b0
c291e47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72b45b0
 
 
 
c291e47
72b45b0
c291e47
72b45b0
 
 
c291e47
72b45b0
 
c291e47
 
72b45b0
 
 
 
 
c291e47
 
 
72b45b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c291e47
72b45b0
 
 
 
 
 
 
1ae7d42
 
 
 
 
 
 
 
 
 
72b45b0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import streamlit as st
import pandas as pd
import re
from datetime import datetime
import fitz  # PyMuPDF
import plotly.express as px
import io

def extract_data_from_pdf(pdf_path):
    # Open the PDF file
    pdf_document = fitz.open(pdf_path)
    extracted_texts = [page.get_text() for page in pdf_document]
    pdf_document.close()

    # Define a regular expression pattern to match the lines with temperature data
    pattern = re.compile(r'(\+\s\d+\sMin\.|\Début)\s*(\d+°C)\s*(\d+°C)\s*(\d{2},\d{2})')
    structured_data = []

    # Process the extracted text from each page
    for page_text in extracted_texts:
        # Extract metadata
        company_match = re.search(r'Entreprise::\s*(.+)', page_text)
        date_match = re.search(r'Date:\s*(\d{2}[./]\d{2}[./]\d{4})', page_text)
        user_match = re.search(r'Utilisateur:\s*(.+)', page_text)
        product_match = re.search(r'Produit:\s*(.+)', page_text)

        if not all([company_match, date_match, user_match, product_match]):
            continue  # Skip pages without metadata

        company = company_match.group(1).strip()
        date = date_match.group(1).replace('/', '.').strip()
        user = user_match.group(1).strip()
        product = product_match.group(1).strip()

        # Find all matches of the pattern in the text
        matches = pattern.findall(page_text)
        for match in matches:
            time, sterilizer_temp, core_temp, f_value = match
            structured_data.append({
                'Date': datetime.strptime(date, '%d.%m.%Y').strftime('%Y-%m-%d'),
                'Entreprise': company,
                'Utilisateur': user,
                'Produit': product,
                'Déroulement': time.strip(),
                'Temp. du stérilisateur': float(sterilizer_temp.replace('°C', '')),
                'Temp. à coeur': float(core_temp.replace('°C', '')),
                'Valeur F': float(f_value.replace(',', '.'))
            })

    return pd.DataFrame(structured_data)

def analyze_sterilization(data):
    results = []
    
    for product, group in data.groupby('Produit'):
        # Determine product type and required temperature
        is_nutabreizh = 'NutaBreizh' in product
        required_temp = 108 if is_nutabreizh else 103
        
        # Count minutes at required temperature
        minutes_at_temp = len(group[group['Temp. à coeur'] >= required_temp])
        
        # Calculate max temperatures
        max_temp_sterilisateur = group['Temp. du stérilisateur'].max()
        max_temp_coeur = group['Temp. à coeur'].max()
        
        # Determine if criteria met
        criteria_met = minutes_at_temp >= 30
        
        results.append({
            'Date': group['Date'].iloc[0],
            'Produit': product,
            'Utilisateur': group['Utilisateur'].iloc[0],
            'Temperature_Requise': required_temp,
            'Minutes_Temperature_Requise': minutes_at_temp,
            'Temperature_Max_Sterilisateur': max_temp_sterilisateur,
            'Temperature_Max_Coeur': max_temp_coeur,
            'Criteres_Respectes': criteria_met
        })
    
    return pd.DataFrame(results)

def main():
    st.title("Analyse des Protocoles de Stérilisation")
    
    uploaded_file = st.file_uploader("Choisir un fichier PDF", type="pdf")
    
    if uploaded_file is not None:
        # Process data
        data = extract_data_from_pdf(uploaded_file)
        results_df = analyze_sterilization(data)
        
        # Display results
        st.subheader("Résultats de l'analyse")
        st.dataframe(results_df)
        
        # Create visualization
        if not results_df.empty:
            fig = px.scatter(results_df, 
                            x='Date', 
                            y='Minutes_Temperature_Requise',
                            color='Criteres_Respectes',
                            hover_data=['Produit', 'Temperature_Requise'],
                            title="Minutes à température requise par production")
            st.plotly_chart(fig)
        else:
            st.warning("Aucune donnée valide à afficher.")
        
        # Export button
        if st.button("Exporter en Excel"):
            output = io.BytesIO()
            with pd.ExcelWriter(output, engine='xlsxwriter') as writer:
                results_df.to_excel(writer, index=False)
            
            output.seek(0)
            st.download_button(
                label="Télécharger l'analyse",
                data=output,
                file_name="analyse_sterilisation.xlsx",
                mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
            )

if __name__ == "__main__":
    main()