Spaces:
Runtime error
Runtime error
cheikhdeme
commited on
Commit
•
ec35913
1
Parent(s):
1994054
Upload folder using huggingface_hub
Browse files- .gitattributes +2 -0
- .gradio/certificate.pem +31 -0
- .gradio/flagged/Télécharger un fichier exécutable/8f4fec8239b0bcbd58df/jre-8u271-windows-x64.exe +3 -0
- .gradio/flagged/Télécharger un fichier exécutable/be2fac359432906d185e/jre-8u271-windows-x64.exe +3 -0
- .gradio/flagged/dataset1.csv +3 -0
- .ipynb_checkpoints/Untitled-checkpoint.ipynb +179 -0
- README.md +2 -8
- Untitled.ipynb +170 -0
- __pycache__/gradio.cpython-312.pyc +0 -0
- dl1.py +64 -0
- dl2.py +108 -0
- dl3.py +133 -0
- dl4.py +92 -0
- random_forest_model.pkl +3 -0
- test.py +77 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
.gradio/flagged/Télécharger[[:space:]]un[[:space:]]fichier[[:space:]]exécutable/8f4fec8239b0bcbd58df/jre-8u271-windows-x64.exe filter=lfs diff=lfs merge=lfs -text
|
37 |
+
.gradio/flagged/Télécharger[[:space:]]un[[:space:]]fichier[[:space:]]exécutable/be2fac359432906d185e/jre-8u271-windows-x64.exe filter=lfs diff=lfs merge=lfs -text
|
.gradio/certificate.pem
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
-----BEGIN CERTIFICATE-----
|
2 |
+
MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
|
3 |
+
TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
|
4 |
+
cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
|
5 |
+
WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
|
6 |
+
ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
|
7 |
+
MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
|
8 |
+
h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
|
9 |
+
0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
|
10 |
+
A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
|
11 |
+
T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
|
12 |
+
B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
|
13 |
+
B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
|
14 |
+
KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
|
15 |
+
OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
|
16 |
+
jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
|
17 |
+
qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
|
18 |
+
rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
|
19 |
+
HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
|
20 |
+
hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
|
21 |
+
ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
|
22 |
+
3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
|
23 |
+
NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
|
24 |
+
ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
|
25 |
+
TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
|
26 |
+
jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
|
27 |
+
oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
|
28 |
+
4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
|
29 |
+
mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
|
30 |
+
emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
|
31 |
+
-----END CERTIFICATE-----
|
.gradio/flagged/Télécharger un fichier exécutable/8f4fec8239b0bcbd58df/jre-8u271-windows-x64.exe
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6210a4cdfc5c67d34027224dfadf48798bf3508e5db6ef268bb93f0fb7d697d5
|
3 |
+
size 83364488
|
.gradio/flagged/Télécharger un fichier exécutable/be2fac359432906d185e/jre-8u271-windows-x64.exe
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6210a4cdfc5c67d34027224dfadf48798bf3508e5db6ef268bb93f0fb7d697d5
|
3 |
+
size 83364488
|
.gradio/flagged/dataset1.csv
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
Télécharger un fichier exécutable,output,timestamp
|
2 |
+
.gradio/flagged/Télécharger un fichier exécutable/be2fac359432906d185e/jre-8u271-windows-x64.exe,🚨 MALWARE (Probabilité: 85.70%),2024-12-17 19:35:57.886915
|
3 |
+
.gradio/flagged/Télécharger un fichier exécutable/8f4fec8239b0bcbd58df/jre-8u271-windows-x64.exe,🚨 MALWARE (Probabilité: 85.70%),2024-12-17 19:44:23.100754
|
.ipynb_checkpoints/Untitled-checkpoint.ipynb
ADDED
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 6,
|
6 |
+
"id": "b156c93b-7114-4401-8956-0bbdf3f55819",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stderr",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"/home/cheikh/anaconda3/lib/python3.12/site-packages/gradio/blocks.py:1049: UserWarning: Cannot load huggingface. Caught Exception: 404 Client Error: Not Found for url: https://huggingface.co/api/spaces/huggingface (Request ID: Root=1-6761d652-5bc4d5a26e798b4156071116;691ae8e4-ee45-43b8-8d96-de80ab472888)\n",
|
14 |
+
"\n",
|
15 |
+
"Sorry, we can't find the page you are looking for.\n",
|
16 |
+
" warnings.warn(f\"Cannot load {theme}. Caught Exception: {str(e)}\")\n"
|
17 |
+
]
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"name": "stdout",
|
21 |
+
"output_type": "stream",
|
22 |
+
"text": [
|
23 |
+
"* Running on local URL: http://127.0.0.1:7861\n",
|
24 |
+
"* Running on public URL: https://9cd0ff2c927f533d29.gradio.live\n",
|
25 |
+
"\n",
|
26 |
+
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
|
27 |
+
]
|
28 |
+
},
|
29 |
+
{
|
30 |
+
"data": {
|
31 |
+
"text/html": [
|
32 |
+
"<div><iframe src=\"https://9cd0ff2c927f533d29.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
33 |
+
],
|
34 |
+
"text/plain": [
|
35 |
+
"<IPython.core.display.HTML object>"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
"metadata": {},
|
39 |
+
"output_type": "display_data"
|
40 |
+
}
|
41 |
+
],
|
42 |
+
"source": [
|
43 |
+
"\n",
|
44 |
+
"import os\n",
|
45 |
+
"import joblib\n",
|
46 |
+
"import pefile\n",
|
47 |
+
"import numpy as np\n",
|
48 |
+
"import pandas as pd\n",
|
49 |
+
"import gradio as gr\n",
|
50 |
+
"import hashlib\n",
|
51 |
+
"\n",
|
52 |
+
"\n",
|
53 |
+
"# Charger le modèle pré-entraîné\n",
|
54 |
+
"try:\n",
|
55 |
+
" model = joblib.load('random_forest_model.pkl')\n",
|
56 |
+
"except Exception as e:\n",
|
57 |
+
" print(f\"Erreur de chargement du modèle : {e}\")\n",
|
58 |
+
" model = None\n",
|
59 |
+
"\n",
|
60 |
+
"def calculate_file_hash(file_path):\n",
|
61 |
+
" \"\"\"Calculer le hash SHA-256 du fichier.\"\"\"\n",
|
62 |
+
" sha256_hash = hashlib.sha256()\n",
|
63 |
+
" with open(file_path, \"rb\") as f:\n",
|
64 |
+
" for byte_block in iter(lambda: f.read(4096), b\"\"):\n",
|
65 |
+
" sha256_hash.update(byte_block)\n",
|
66 |
+
" return sha256_hash.hexdigest()\n",
|
67 |
+
"\n",
|
68 |
+
"def extract_pe_attributes(file_path):\n",
|
69 |
+
" \"\"\"Extraction avancée des attributs du fichier PE.\"\"\"\n",
|
70 |
+
" try:\n",
|
71 |
+
" pe = pefile.PE(file_path)\n",
|
72 |
+
"\n",
|
73 |
+
" attributes = {\n",
|
74 |
+
" # Attributs PE standard\n",
|
75 |
+
" 'AddressOfEntryPoint': pe.OPTIONAL_HEADER.AddressOfEntryPoint,\n",
|
76 |
+
" 'MajorLinkerVersion': pe.OPTIONAL_HEADER.MajorLinkerVersion,\n",
|
77 |
+
" 'MajorImageVersion': pe.OPTIONAL_HEADER.MajorImageVersion,\n",
|
78 |
+
" 'MajorOperatingSystemVersion': pe.OPTIONAL_HEADER.MajorOperatingSystemVersion,\n",
|
79 |
+
" 'DllCharacteristics': pe.OPTIONAL_HEADER.DllCharacteristics,\n",
|
80 |
+
" 'SizeOfStackReserve': pe.OPTIONAL_HEADER.SizeOfStackReserve,\n",
|
81 |
+
" 'NumberOfSections': pe.FILE_HEADER.NumberOfSections,\n",
|
82 |
+
" 'ResourceSize':pe.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size\n",
|
83 |
+
" }\n",
|
84 |
+
" \"\"\"## Ressources\n",
|
85 |
+
" data_directory_entries = pe.OPTIONAL_HEADER.DATA_DIRECTORY\n",
|
86 |
+
" # Parcourir la liste pour trouver l'entrée du répertoire des ressources\n",
|
87 |
+
" for entry in data_directory_entries:\n",
|
88 |
+
" if entry.name == \"IMAGE_DIRECTORY_ENTRY_RESOURCE\":\n",
|
89 |
+
" resource_size = entry.Size\n",
|
90 |
+
" attributes['ResourceSize'] = resource_size\n",
|
91 |
+
" break\n",
|
92 |
+
" else:\n",
|
93 |
+
" attributes['ResourceSize'] = 0\"\"\"\n",
|
94 |
+
" \n",
|
95 |
+
" \n",
|
96 |
+
"\n",
|
97 |
+
" return attributes\n",
|
98 |
+
" except Exception as e:\n",
|
99 |
+
" print(f\"Erreur de traitement du fichier {file_path}: {str(e)}\")\n",
|
100 |
+
" return f\"Erreur de traitement du fichier {file_path}: {str(e)}\"\n",
|
101 |
+
"\n",
|
102 |
+
"def predict_malware(file):\n",
|
103 |
+
" \"\"\"Prédiction de malware avec gestion d'erreurs.\"\"\"\n",
|
104 |
+
" if model is None:\n",
|
105 |
+
" return \"Erreur : Modèle non chargé\"\n",
|
106 |
+
"\n",
|
107 |
+
" try:\n",
|
108 |
+
" # Extraire les attributs du fichier\n",
|
109 |
+
" attributes = extract_pe_attributes(file.name)\n",
|
110 |
+
" if \"Erreur\" in attributes:\n",
|
111 |
+
" return attributes\n",
|
112 |
+
"\n",
|
113 |
+
" # Convertir en DataFrame\n",
|
114 |
+
" df = pd.DataFrame([attributes])\n",
|
115 |
+
"\n",
|
116 |
+
" # Prédiction\n",
|
117 |
+
" prediction = model.predict(df)\n",
|
118 |
+
" proba = model.predict_proba(df)[0]\n",
|
119 |
+
"\n",
|
120 |
+
" # Résultat avec probabilité\n",
|
121 |
+
" if prediction[0] == 1:\n",
|
122 |
+
" return f\"🚨 MALWARE (Probabilité: {proba[1] * 100:.2f}%)\"\n",
|
123 |
+
" else:\n",
|
124 |
+
" return f\"✅ Fichier Légitime (Probabilité: {proba[0] * 100:.2f}%)\"\n",
|
125 |
+
" except Exception as e:\n",
|
126 |
+
" return f\"Erreur d'analyse : {str(e)}\"\n",
|
127 |
+
"\n",
|
128 |
+
"# Interface Gradio\n",
|
129 |
+
"demo = gr.Interface(\n",
|
130 |
+
" fn=predict_malware,\n",
|
131 |
+
" inputs=gr.File(file_types=['.exe', '.dll', '.sys'], label=\"Télécharger un fichier exécutable\"),\n",
|
132 |
+
" outputs=\"text\",\n",
|
133 |
+
" title=\"🛡️ Détecteur de Malwares\",\n",
|
134 |
+
" theme='huggingface' # Thème moderne\n",
|
135 |
+
")\n",
|
136 |
+
"\n",
|
137 |
+
"if __name__ == \"__main__\":\n",
|
138 |
+
" demo.launch(share=True) # Rend l'interface accessible publiquement\n"
|
139 |
+
]
|
140 |
+
},
|
141 |
+
{
|
142 |
+
"cell_type": "code",
|
143 |
+
"execution_count": null,
|
144 |
+
"id": "5f87e13b-157d-4105-865f-daa2919c2711",
|
145 |
+
"metadata": {},
|
146 |
+
"outputs": [],
|
147 |
+
"source": []
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"cell_type": "code",
|
151 |
+
"execution_count": null,
|
152 |
+
"id": "c23ce0c3-ac81-438b-a8b8-1264ac99dd12",
|
153 |
+
"metadata": {},
|
154 |
+
"outputs": [],
|
155 |
+
"source": []
|
156 |
+
}
|
157 |
+
],
|
158 |
+
"metadata": {
|
159 |
+
"kernelspec": {
|
160 |
+
"display_name": "Python 3 (ipykernel)",
|
161 |
+
"language": "python",
|
162 |
+
"name": "python3"
|
163 |
+
},
|
164 |
+
"language_info": {
|
165 |
+
"codemirror_mode": {
|
166 |
+
"name": "ipython",
|
167 |
+
"version": 3
|
168 |
+
},
|
169 |
+
"file_extension": ".py",
|
170 |
+
"mimetype": "text/x-python",
|
171 |
+
"name": "python",
|
172 |
+
"nbconvert_exporter": "python",
|
173 |
+
"pygments_lexer": "ipython3",
|
174 |
+
"version": "3.12.4"
|
175 |
+
}
|
176 |
+
},
|
177 |
+
"nbformat": 4,
|
178 |
+
"nbformat_minor": 5
|
179 |
+
}
|
README.md
CHANGED
@@ -1,12 +1,6 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
|
4 |
-
colorFrom: purple
|
5 |
-
colorTo: green
|
6 |
sdk: gradio
|
7 |
sdk_version: 5.9.1
|
8 |
-
app_file: app.py
|
9 |
-
pinned: false
|
10 |
---
|
11 |
-
|
12 |
-
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
1 |
---
|
2 |
+
title: malware
|
3 |
+
app_file: Untitled.ipynb
|
|
|
|
|
4 |
sdk: gradio
|
5 |
sdk_version: 5.9.1
|
|
|
|
|
6 |
---
|
|
|
|
Untitled.ipynb
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "code",
|
5 |
+
"execution_count": 7,
|
6 |
+
"id": "b156c93b-7114-4401-8956-0bbdf3f55819",
|
7 |
+
"metadata": {},
|
8 |
+
"outputs": [
|
9 |
+
{
|
10 |
+
"name": "stderr",
|
11 |
+
"output_type": "stream",
|
12 |
+
"text": [
|
13 |
+
"/home/cheikh/anaconda3/lib/python3.12/site-packages/sklearn/base.py:376: InconsistentVersionWarning: Trying to unpickle estimator DecisionTreeClassifier from version 1.5.2 when using version 1.4.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
|
14 |
+
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
|
15 |
+
" warnings.warn(\n",
|
16 |
+
"/home/cheikh/anaconda3/lib/python3.12/site-packages/sklearn/base.py:376: InconsistentVersionWarning: Trying to unpickle estimator RandomForestClassifier from version 1.5.2 when using version 1.4.2. This might lead to breaking code or invalid results. Use at your own risk. For more info please refer to:\n",
|
17 |
+
"https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations\n",
|
18 |
+
" warnings.warn(\n",
|
19 |
+
"/home/cheikh/anaconda3/lib/python3.12/site-packages/gradio/blocks.py:1049: UserWarning: Cannot load huggingface. Caught Exception: 404 Client Error: Not Found for url: https://huggingface.co/api/spaces/huggingface (Request ID: Root=1-6761d6db-0c06b74870454450704094b9;d4cdbbda-a206-4969-bdc5-e2685d9d5157)\n",
|
20 |
+
"\n",
|
21 |
+
"Sorry, we can't find the page you are looking for.\n",
|
22 |
+
" warnings.warn(f\"Cannot load {theme}. Caught Exception: {str(e)}\")\n"
|
23 |
+
]
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"name": "stdout",
|
27 |
+
"output_type": "stream",
|
28 |
+
"text": [
|
29 |
+
"* Running on local URL: http://127.0.0.1:7862\n",
|
30 |
+
"* Running on public URL: https://3202cd86a5db7b27c9.gradio.live\n",
|
31 |
+
"\n",
|
32 |
+
"This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)\n"
|
33 |
+
]
|
34 |
+
},
|
35 |
+
{
|
36 |
+
"data": {
|
37 |
+
"text/html": [
|
38 |
+
"<div><iframe src=\"https://3202cd86a5db7b27c9.gradio.live\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
|
39 |
+
],
|
40 |
+
"text/plain": [
|
41 |
+
"<IPython.core.display.HTML object>"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
"metadata": {},
|
45 |
+
"output_type": "display_data"
|
46 |
+
}
|
47 |
+
],
|
48 |
+
"source": [
|
49 |
+
"\n",
|
50 |
+
"import os\n",
|
51 |
+
"import joblib\n",
|
52 |
+
"import pefile\n",
|
53 |
+
"import numpy as np\n",
|
54 |
+
"import pandas as pd\n",
|
55 |
+
"import gradio as gr\n",
|
56 |
+
"import hashlib\n",
|
57 |
+
"\n",
|
58 |
+
"\n",
|
59 |
+
"# Charger le modèle pré-entraîné\n",
|
60 |
+
"try:\n",
|
61 |
+
" model = joblib.load('random_forest_model.pkl')\n",
|
62 |
+
"except Exception as e:\n",
|
63 |
+
" print(f\"Erreur de chargement du modèle : {e}\")\n",
|
64 |
+
" model = None\n",
|
65 |
+
"\n",
|
66 |
+
"def calculate_file_hash(file_path):\n",
|
67 |
+
" \"\"\"Calculer le hash SHA-256 du fichier.\"\"\"\n",
|
68 |
+
" sha256_hash = hashlib.sha256()\n",
|
69 |
+
" with open(file_path, \"rb\") as f:\n",
|
70 |
+
" for byte_block in iter(lambda: f.read(4096), b\"\"):\n",
|
71 |
+
" sha256_hash.update(byte_block)\n",
|
72 |
+
" return sha256_hash.hexdigest()\n",
|
73 |
+
"\n",
|
74 |
+
"def extract_pe_attributes(file_path):\n",
|
75 |
+
" \"\"\"Extraction avancée des attributs du fichier PE.\"\"\"\n",
|
76 |
+
" try:\n",
|
77 |
+
" pe = pefile.PE(file_path)\n",
|
78 |
+
"\n",
|
79 |
+
" attributes = {\n",
|
80 |
+
" # Attributs PE standard\n",
|
81 |
+
" 'AddressOfEntryPoint': pe.OPTIONAL_HEADER.AddressOfEntryPoint,\n",
|
82 |
+
" 'MajorLinkerVersion': pe.OPTIONAL_HEADER.MajorLinkerVersion,\n",
|
83 |
+
" 'MajorImageVersion': pe.OPTIONAL_HEADER.MajorImageVersion,\n",
|
84 |
+
" 'MajorOperatingSystemVersion': pe.OPTIONAL_HEADER.MajorOperatingSystemVersion,\n",
|
85 |
+
" 'DllCharacteristics': pe.OPTIONAL_HEADER.DllCharacteristics,\n",
|
86 |
+
" 'SizeOfStackReserve': pe.OPTIONAL_HEADER.SizeOfStackReserve,\n",
|
87 |
+
" 'NumberOfSections': pe.FILE_HEADER.NumberOfSections,\n",
|
88 |
+
" 'ResourceSize':pe.OPTIONAL_HEADER.DATA_DIRECTORY[2].Size\n",
|
89 |
+
" }\n",
|
90 |
+
" \n",
|
91 |
+
" \"\"\"## Ressources\n",
|
92 |
+
" data_directory_entries = pe.OPTIONAL_HEADER.DATA_DIRECTORY\n",
|
93 |
+
" # Parcourir la liste pour trouver l'entrée du répertoire des ressources\n",
|
94 |
+
" for entry in data_directory_entries:\n",
|
95 |
+
" if entry.name == \"IMAGE_DIRECTORY_ENTRY_RESOURCE\":\n",
|
96 |
+
" resource_size = entry.Size\n",
|
97 |
+
" attributes['ResourceSize'] = resource_size\n",
|
98 |
+
" break\n",
|
99 |
+
" else:\n",
|
100 |
+
" attributes['ResourceSize'] = 0\"\"\"\n",
|
101 |
+
" \n",
|
102 |
+
"\n",
|
103 |
+
"\n",
|
104 |
+
" return attributes\n",
|
105 |
+
" except Exception as e:\n",
|
106 |
+
" print(f\"Erreur de traitement du fichier {file_path}: {str(e)}\")\n",
|
107 |
+
" return f\"Erreur de traitement du fichier {file_path}: {str(e)}\"\n",
|
108 |
+
"\n",
|
109 |
+
"def predict_malware(file):\n",
|
110 |
+
" \"\"\"Prédiction de malware avec gestion d'erreurs.\"\"\"\n",
|
111 |
+
" if model is None:\n",
|
112 |
+
" return \"Erreur : Modèle non chargé\"\n",
|
113 |
+
"\n",
|
114 |
+
" try:\n",
|
115 |
+
" # Extraire les attributs du fichier\n",
|
116 |
+
" attributes = extract_pe_attributes(file.name)\n",
|
117 |
+
" if \"Erreur\" in attributes:\n",
|
118 |
+
" return attributes\n",
|
119 |
+
"\n",
|
120 |
+
" # Convertir en DataFrame\n",
|
121 |
+
" df = pd.DataFrame([attributes])\n",
|
122 |
+
"\n",
|
123 |
+
" # Prédiction\n",
|
124 |
+
" prediction = model.predict(df)\n",
|
125 |
+
" proba = model.predict_proba(df)[0]\n",
|
126 |
+
"\n",
|
127 |
+
" # Résultat avec probabilité\n",
|
128 |
+
" if prediction[0] == 1:\n",
|
129 |
+
" return f\"🚨 MALWARE (Probabilité: {proba[1] * 100:.2f}%)\"\n",
|
130 |
+
" else:\n",
|
131 |
+
" return f\"✅ Fichier Légitime (Probabilité: {proba[0] * 100:.2f}%)\"\n",
|
132 |
+
" except Exception as e:\n",
|
133 |
+
" return f\"Erreur d'analyse : {str(e)}\"\n",
|
134 |
+
"\n",
|
135 |
+
"# Interface Gradio\n",
|
136 |
+
"demo = gr.Interface(\n",
|
137 |
+
" fn=predict_malware,\n",
|
138 |
+
" inputs=gr.File(file_types=['.exe', '.dll', '.sys'], label=\"Télécharger un fichier exécutable\"),\n",
|
139 |
+
" outputs=\"text\",\n",
|
140 |
+
" title=\"🛡️ Détecteur de Malwares\",\n",
|
141 |
+
" theme='huggingface' # Thème moderne\n",
|
142 |
+
")\n",
|
143 |
+
"\n",
|
144 |
+
"if __name__ == \"__main__\":\n",
|
145 |
+
" demo.launch(share=True) # Rend l'interface accessible publiquement\n"
|
146 |
+
]
|
147 |
+
}
|
148 |
+
],
|
149 |
+
"metadata": {
|
150 |
+
"kernelspec": {
|
151 |
+
"display_name": "Python 3 (ipykernel)",
|
152 |
+
"language": "python",
|
153 |
+
"name": "python3"
|
154 |
+
},
|
155 |
+
"language_info": {
|
156 |
+
"codemirror_mode": {
|
157 |
+
"name": "ipython",
|
158 |
+
"version": 3
|
159 |
+
},
|
160 |
+
"file_extension": ".py",
|
161 |
+
"mimetype": "text/x-python",
|
162 |
+
"name": "python",
|
163 |
+
"nbconvert_exporter": "python",
|
164 |
+
"pygments_lexer": "ipython3",
|
165 |
+
"version": "3.12.4"
|
166 |
+
}
|
167 |
+
},
|
168 |
+
"nbformat": 4,
|
169 |
+
"nbformat_minor": 5
|
170 |
+
}
|
__pycache__/gradio.cpython-312.pyc
ADDED
Binary file (4.33 kB). View file
|
|
dl1.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import numpy as np
|
2 |
+
import pandas as pd
|
3 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
4 |
+
from sklearn.compose import make_column_transformer, make_column_selector
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
|
7 |
+
fuel = pd.read_csv('../input/dl-course-data/fuel.csv')
|
8 |
+
|
9 |
+
X = fuel.copy()
|
10 |
+
# Remove target
|
11 |
+
y = X.pop('FE')
|
12 |
+
|
13 |
+
preprocessor = make_column_transformer(
|
14 |
+
(StandardScaler(),
|
15 |
+
make_column_selector(dtype_include=np.number)),
|
16 |
+
(OneHotEncoder(sparse=False),
|
17 |
+
make_column_selector(dtype_include=object)),
|
18 |
+
)
|
19 |
+
|
20 |
+
X = preprocessor.fit_transform(X)
|
21 |
+
y = np.log(y) # log transform target instead of standardizing
|
22 |
+
|
23 |
+
input_shape = [X.shape[1]]
|
24 |
+
print("Input shape: {}".format(input_shape))
|
25 |
+
|
26 |
+
from tensorflow import keras
|
27 |
+
from tensorflow.keras import layers
|
28 |
+
|
29 |
+
model = keras.Sequential([
|
30 |
+
layers.Dense(128, activation='relu', input_shape=input_shape),
|
31 |
+
layers.Dense(128, activation='relu'),
|
32 |
+
layers.Dense(64, activation='relu'),
|
33 |
+
layers.Dense(1),
|
34 |
+
])
|
35 |
+
model.compile(
|
36 |
+
optimizer='adam',
|
37 |
+
loss='mae',
|
38 |
+
)
|
39 |
+
history = model.fit(
|
40 |
+
X, y,
|
41 |
+
batch_size=128,
|
42 |
+
epochs=200,
|
43 |
+
)
|
44 |
+
|
45 |
+
import pandas as pd
|
46 |
+
|
47 |
+
history_df = pd.DataFrame(history.history)
|
48 |
+
# Start the plot at epoch 5. You can change this to get a different view.
|
49 |
+
history_df.loc[5:, ['loss']].plot();
|
50 |
+
|
51 |
+
# YOUR CODE HERE: Experiment with different values for the learning rate, batch size, and number of examples
|
52 |
+
learning_rate = 0.05
|
53 |
+
batch_size = 32
|
54 |
+
num_examples = 256
|
55 |
+
|
56 |
+
animate_sgd(
|
57 |
+
learning_rate=learning_rate,
|
58 |
+
batch_size=batch_size,
|
59 |
+
num_examples=num_examples,
|
60 |
+
# You can also change these, if you like
|
61 |
+
steps=50, # total training steps (batches seen)
|
62 |
+
true_w=3.0, # the slope of the data
|
63 |
+
true_b=2.0, # the bias of the data
|
64 |
+
)
|
dl2.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
3 |
+
from sklearn.compose import make_column_transformer
|
4 |
+
from sklearn.model_selection import GroupShuffleSplit
|
5 |
+
|
6 |
+
from tensorflow import keras
|
7 |
+
from tensorflow.keras import layers
|
8 |
+
from tensorflow.keras import callbacks
|
9 |
+
|
10 |
+
spotify = pd.read_csv('../input/dl-course-data/spotify.csv')
|
11 |
+
|
12 |
+
X = spotify.copy().dropna()
|
13 |
+
y = X.pop('track_popularity')
|
14 |
+
artists = X['track_artist']
|
15 |
+
|
16 |
+
features_num = ['danceability', 'energy', 'key', 'loudness', 'mode',
|
17 |
+
'speechiness', 'acousticness', 'instrumentalness',
|
18 |
+
'liveness', 'valence', 'tempo', 'duration_ms']
|
19 |
+
features_cat = ['playlist_genre']
|
20 |
+
|
21 |
+
preprocessor = make_column_transformer(
|
22 |
+
(StandardScaler(), features_num),
|
23 |
+
(OneHotEncoder(), features_cat),
|
24 |
+
)
|
25 |
+
|
26 |
+
# We'll do a "grouped" split to keep all of an artist's songs in one
|
27 |
+
# split or the other. This is to help prevent signal leakage.
|
28 |
+
def group_split(X, y, group, train_size=0.75):
|
29 |
+
splitter = GroupShuffleSplit(train_size=train_size)
|
30 |
+
train, test = next(splitter.split(X, y, groups=group))
|
31 |
+
return (X.iloc[train], X.iloc[test], y.iloc[train], y.iloc[test])
|
32 |
+
|
33 |
+
X_train, X_valid, y_train, y_valid = group_split(X, y, artists)
|
34 |
+
|
35 |
+
X_train = preprocessor.fit_transform(X_train)
|
36 |
+
X_valid = preprocessor.transform(X_valid)
|
37 |
+
y_train = y_train / 100 # popularity is on a scale 0-100, so this rescales to 0-1.
|
38 |
+
y_valid = y_valid / 100
|
39 |
+
|
40 |
+
input_shape = [X_train.shape[1]]
|
41 |
+
print("Input shape: {}".format(input_shape))
|
42 |
+
|
43 |
+
|
44 |
+
model = keras.Sequential([
|
45 |
+
layers.Dense(1, input_shape=input_shape),
|
46 |
+
])
|
47 |
+
model.compile(
|
48 |
+
optimizer='adam',
|
49 |
+
loss='mae',
|
50 |
+
)
|
51 |
+
history = model.fit(
|
52 |
+
X_train, y_train,
|
53 |
+
validation_data=(X_valid, y_valid),
|
54 |
+
batch_size=512,
|
55 |
+
epochs=50,
|
56 |
+
verbose=0, # suppress output since we'll plot the curves
|
57 |
+
)
|
58 |
+
history_df = pd.DataFrame(history.history)
|
59 |
+
history_df.loc[0:, ['loss', 'val_loss']].plot()
|
60 |
+
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()));
|
61 |
+
|
62 |
+
|
63 |
+
model = keras.Sequential([
|
64 |
+
layers.Dense(128, activation='relu', input_shape=input_shape),
|
65 |
+
layers.Dense(64, activation='relu'),
|
66 |
+
layers.Dense(1)
|
67 |
+
])
|
68 |
+
model.compile(
|
69 |
+
optimizer='adam',
|
70 |
+
loss='mae',
|
71 |
+
)
|
72 |
+
history = model.fit(
|
73 |
+
X_train, y_train,
|
74 |
+
validation_data=(X_valid, y_valid),
|
75 |
+
batch_size=512,
|
76 |
+
epochs=50,
|
77 |
+
)
|
78 |
+
history_df = pd.DataFrame(history.history)
|
79 |
+
history_df.loc[:, ['loss', 'val_loss']].plot()
|
80 |
+
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()));
|
81 |
+
|
82 |
+
|
83 |
+
early_stopping = callbacks.EarlyStopping(
|
84 |
+
patience=5,
|
85 |
+
min_delta=0.001,
|
86 |
+
restore_best_weights=True,
|
87 |
+
)
|
88 |
+
|
89 |
+
|
90 |
+
model = keras.Sequential([
|
91 |
+
layers.Dense(128, activation='relu', input_shape=input_shape),
|
92 |
+
layers.Dense(64, activation='relu'),
|
93 |
+
layers.Dense(1)
|
94 |
+
])
|
95 |
+
model.compile(
|
96 |
+
optimizer='adam',
|
97 |
+
loss='mae',
|
98 |
+
)
|
99 |
+
history = model.fit(
|
100 |
+
X_train, y_train,
|
101 |
+
validation_data=(X_valid, y_valid),
|
102 |
+
batch_size=512,
|
103 |
+
epochs=50,
|
104 |
+
callbacks=[early_stopping]
|
105 |
+
)
|
106 |
+
history_df = pd.DataFrame(history.history)
|
107 |
+
history_df.loc[:, ['loss', 'val_loss']].plot()
|
108 |
+
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()));
|
dl3.py
ADDED
@@ -0,0 +1,133 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
3 |
+
from sklearn.compose import make_column_transformer
|
4 |
+
from sklearn.model_selection import GroupShuffleSplit
|
5 |
+
|
6 |
+
from tensorflow import keras
|
7 |
+
from tensorflow.keras import layers
|
8 |
+
from tensorflow.keras import callbacks
|
9 |
+
|
10 |
+
spotify = pd.read_csv('../input/dl-course-data/spotify.csv')
|
11 |
+
|
12 |
+
X = spotify.copy().dropna()
|
13 |
+
y = X.pop('track_popularity')
|
14 |
+
artists = X['track_artist']
|
15 |
+
|
16 |
+
features_num = ['danceability', 'energy', 'key', 'loudness', 'mode',
|
17 |
+
'speechiness', 'acousticness', 'instrumentalness',
|
18 |
+
'liveness', 'valence', 'tempo', 'duration_ms']
|
19 |
+
features_cat = ['playlist_genre']
|
20 |
+
|
21 |
+
preprocessor = make_column_transformer(
|
22 |
+
(StandardScaler(), features_num),
|
23 |
+
(OneHotEncoder(), features_cat),
|
24 |
+
)
|
25 |
+
|
26 |
+
def group_split(X, y, group, train_size=0.75):
|
27 |
+
splitter = GroupShuffleSplit(train_size=train_size)
|
28 |
+
train, test = next(splitter.split(X, y, groups=group))
|
29 |
+
return (X.iloc[train], X.iloc[test], y.iloc[train], y.iloc[test])
|
30 |
+
|
31 |
+
X_train, X_valid, y_train, y_valid = group_split(X, y, artists)
|
32 |
+
|
33 |
+
X_train = preprocessor.fit_transform(X_train)
|
34 |
+
X_valid = preprocessor.transform(X_valid)
|
35 |
+
y_train = y_train / 100
|
36 |
+
y_valid = y_valid / 100
|
37 |
+
|
38 |
+
input_shape = [X_train.shape[1]]
|
39 |
+
print("Input shape: {}".format(input_shape))
|
40 |
+
|
41 |
+
model = keras.Sequential([
|
42 |
+
layers.Dense(128, activation='relu', input_shape=input_shape),
|
43 |
+
layers.Dropout(0.3),
|
44 |
+
layers.Dense(64, activation='relu'),
|
45 |
+
layers.Dropout(0.3),
|
46 |
+
layers.Dense(1)
|
47 |
+
])
|
48 |
+
|
49 |
+
model.compile(
|
50 |
+
optimizer='adam',
|
51 |
+
loss='mae',
|
52 |
+
)
|
53 |
+
history = model.fit(
|
54 |
+
X_train, y_train,
|
55 |
+
validation_data=(X_valid, y_valid),
|
56 |
+
batch_size=512,
|
57 |
+
epochs=50,
|
58 |
+
verbose=0,
|
59 |
+
)
|
60 |
+
history_df = pd.DataFrame(history.history)
|
61 |
+
history_df.loc[:, ['loss', 'val_loss']].plot()
|
62 |
+
print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))
|
63 |
+
|
64 |
+
|
65 |
+
import pandas as pd
|
66 |
+
|
67 |
+
concrete = pd.read_csv('../input/dl-course-data/concrete.csv')
|
68 |
+
df = concrete.copy()
|
69 |
+
|
70 |
+
df_train = df.sample(frac=0.7, random_state=0)
|
71 |
+
df_valid = df.drop(df_train.index)
|
72 |
+
|
73 |
+
X_train = df_train.drop('CompressiveStrength', axis=1)
|
74 |
+
X_valid = df_valid.drop('CompressiveStrength', axis=1)
|
75 |
+
y_train = df_train['CompressiveStrength']
|
76 |
+
y_valid = df_valid['CompressiveStrength']
|
77 |
+
|
78 |
+
input_shape = [X_train.shape[1]]
|
79 |
+
|
80 |
+
|
81 |
+
model = keras.Sequential([
|
82 |
+
layers.Dense(512, activation='relu', input_shape=input_shape),
|
83 |
+
layers.Dense(512, activation='relu'),
|
84 |
+
layers.Dense(512, activation='relu'),
|
85 |
+
layers.Dense(1),
|
86 |
+
])
|
87 |
+
model.compile(
|
88 |
+
optimizer='sgd', # SGD is more sensitive to differences of scale
|
89 |
+
loss='mae',
|
90 |
+
metrics=['mae'],
|
91 |
+
)
|
92 |
+
history = model.fit(
|
93 |
+
X_train, y_train,
|
94 |
+
validation_data=(X_valid, y_valid),
|
95 |
+
batch_size=64,
|
96 |
+
epochs=100,
|
97 |
+
verbose=0,
|
98 |
+
)
|
99 |
+
|
100 |
+
history_df = pd.DataFrame(history.history)
|
101 |
+
history_df.loc[0:, ['loss', 'val_loss']].plot()
|
102 |
+
print(("Minimum Validation Loss: {:0.4f}").format(history_df['val_loss'].min()))
|
103 |
+
|
104 |
+
|
105 |
+
model = keras.Sequential([
|
106 |
+
layers.BatchNormalization(input_shape=input_shape),
|
107 |
+
layers.Dense(512, activation='relu'),
|
108 |
+
layers.BatchNormalization(),
|
109 |
+
layers.Dense(512, activation='relu'),
|
110 |
+
layers.BatchNormalization(),
|
111 |
+
layers.Dense(512, activation='relu'),
|
112 |
+
layers.BatchNormalization(),
|
113 |
+
layers.Dense(1),
|
114 |
+
])
|
115 |
+
|
116 |
+
|
117 |
+
model.compile(
|
118 |
+
optimizer='sgd',
|
119 |
+
loss='mae',
|
120 |
+
metrics=['mae'],
|
121 |
+
)
|
122 |
+
EPOCHS = 100
|
123 |
+
history = model.fit(
|
124 |
+
X_train, y_train,
|
125 |
+
validation_data=(X_valid, y_valid),
|
126 |
+
batch_size=64,
|
127 |
+
epochs=EPOCHS,
|
128 |
+
verbose=0,
|
129 |
+
)
|
130 |
+
|
131 |
+
history_df = pd.DataFrame(history.history)
|
132 |
+
history_df.loc[0:, ['loss', 'val_loss']].plot()
|
133 |
+
print(("Minimum Validation Loss: {:0.4f}").format(history_df['val_loss'].min()))
|
dl4.py
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
from sklearn.model_selection import train_test_split
|
4 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
5 |
+
from sklearn.impute import SimpleImputer
|
6 |
+
from sklearn.pipeline import make_pipeline
|
7 |
+
from sklearn.compose import make_column_transformer
|
8 |
+
|
9 |
+
hotel = pd.read_csv('../input/dl-course-data/hotel.csv')
|
10 |
+
|
11 |
+
X = hotel.copy()
|
12 |
+
y = X.pop('is_canceled')
|
13 |
+
|
14 |
+
X['arrival_date_month'] = \
|
15 |
+
X['arrival_date_month'].map(
|
16 |
+
{'January':1, 'February': 2, 'March':3,
|
17 |
+
'April':4, 'May':5, 'June':6, 'July':7,
|
18 |
+
'August':8, 'September':9, 'October':10,
|
19 |
+
'November':11, 'December':12}
|
20 |
+
)
|
21 |
+
|
22 |
+
features_num = [
|
23 |
+
"lead_time", "arrival_date_week_number",
|
24 |
+
"arrival_date_day_of_month", "stays_in_weekend_nights",
|
25 |
+
"stays_in_week_nights", "adults", "children", "babies",
|
26 |
+
"is_repeated_guest", "previous_cancellations",
|
27 |
+
"previous_bookings_not_canceled", "required_car_parking_spaces",
|
28 |
+
"total_of_special_requests", "adr",
|
29 |
+
]
|
30 |
+
features_cat = [
|
31 |
+
"hotel", "arrival_date_month", "meal",
|
32 |
+
"market_segment", "distribution_channel",
|
33 |
+
"reserved_room_type", "deposit_type", "customer_type",
|
34 |
+
]
|
35 |
+
|
36 |
+
transformer_num = make_pipeline(
|
37 |
+
SimpleImputer(strategy="constant"), # there are a few missing values
|
38 |
+
StandardScaler(),
|
39 |
+
)
|
40 |
+
transformer_cat = make_pipeline(
|
41 |
+
SimpleImputer(strategy="constant", fill_value="NA"),
|
42 |
+
OneHotEncoder(handle_unknown='ignore'),
|
43 |
+
)
|
44 |
+
|
45 |
+
preprocessor = make_column_transformer(
|
46 |
+
(transformer_num, features_num),
|
47 |
+
(transformer_cat, features_cat),
|
48 |
+
)
|
49 |
+
|
50 |
+
# stratify - make sure classes are evenlly represented across splits
|
51 |
+
X_train, X_valid, y_train, y_valid = \
|
52 |
+
train_test_split(X, y, stratify=y, train_size=0.75)
|
53 |
+
|
54 |
+
X_train = preprocessor.fit_transform(X_train)
|
55 |
+
X_valid = preprocessor.transform(X_valid)
|
56 |
+
|
57 |
+
input_shape = [X_train.shape[1]]
|
58 |
+
|
59 |
+
from tensorflow import keras
|
60 |
+
from tensorflow.keras import layers
|
61 |
+
|
62 |
+
model = keras.Sequential([
|
63 |
+
layers.BatchNormalization(input_shape=input_shape),
|
64 |
+
layers.Dense(256, activation='relu'),
|
65 |
+
layers.BatchNormalization(),
|
66 |
+
layers.Dropout(0.3),
|
67 |
+
layers.Dense(256, activation='relu'),
|
68 |
+
layers.BatchNormalization(),
|
69 |
+
layers.Dropout(0.3),
|
70 |
+
layers.Dense(1, activation='sigmoid')
|
71 |
+
])
|
72 |
+
model.compile(
|
73 |
+
optimizer='adam',
|
74 |
+
loss='binary_crossentropy',
|
75 |
+
metrics=['binary_accuracy'],
|
76 |
+
)
|
77 |
+
early_stopping = keras.callbacks.EarlyStopping(
|
78 |
+
patience=5,
|
79 |
+
min_delta=0.001,
|
80 |
+
restore_best_weights=True,
|
81 |
+
)
|
82 |
+
history = model.fit(
|
83 |
+
X_train, y_train,
|
84 |
+
validation_data=(X_valid, y_valid),
|
85 |
+
batch_size=512,
|
86 |
+
epochs=200,
|
87 |
+
callbacks=[early_stopping],
|
88 |
+
)
|
89 |
+
|
90 |
+
history_df = pd.DataFrame(history.history)
|
91 |
+
history_df.loc[:, ['loss', 'val_loss']].plot(title="Cross-entropy")
|
92 |
+
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot(title="Accuracy")
|
random_forest_model.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7e2f05c9a67688563b18f755aa4bc75c3daa19bd91f92af80ba3cbde89ab710e
|
3 |
+
size 37522105
|
test.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pefile
|
2 |
+
import tkinter as tk
|
3 |
+
from tkinter import filedialog
|
4 |
+
|
5 |
+
def extract_pe_info(file_path):
|
6 |
+
try:
|
7 |
+
pe = pefile.PE(file_path)
|
8 |
+
info = {
|
9 |
+
'AddressOfEntryPoint': hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint),
|
10 |
+
'MajorLinkerVersion': pe.OPTIONAL_HEADER.MajorLinkerVersion,
|
11 |
+
'MajorImageVersion': pe.OPTIONAL_HEADER.MajorImageVersion,
|
12 |
+
'MajorOperatingSystemVersion': pe.OPTIONAL_HEADER.MajorOperatingSystemVersion,
|
13 |
+
'DllCharacteristics': hex(pe.OPTIONAL_HEADER.DllCharacteristics),
|
14 |
+
'SizeOfStackReserve': pe.OPTIONAL_HEADER.SizeOfStackReserve,
|
15 |
+
'NumberOfSections': pe.FILE_HEADER.NumberOfSections,
|
16 |
+
'SizeOfImage': pe.OPTIONAL_HEADER.SizeOfImage,
|
17 |
+
'SizeOfHeaders': pe.OPTIONAL_HEADER.SizeOfHeaders,
|
18 |
+
'Subsystem': pe.OPTIONAL_HEADER.Subsystem,
|
19 |
+
'Magic': pe.FILE_HEADER.Machine,
|
20 |
+
'Characteristics': hex(pe.FILE_HEADER.Characteristics),
|
21 |
+
'TimeDateStamp': pe.FILE_HEADER.TimeDateStamp,
|
22 |
+
'ImageBase': hex(pe.OPTIONAL_HEADER.ImageBase),
|
23 |
+
'CheckSum': pe.OPTIONAL_HEADER.CheckSum,
|
24 |
+
|
25 |
+
'SizeOfCode': pe.OPTIONAL_HEADER.SizeOfCode,
|
26 |
+
'SizeOfInitializedData': pe.OPTIONAL_HEADER.SizeOfInitializedData,
|
27 |
+
'SizeOfUninitializedData': pe.OPTIONAL_HEADER.SizeOfUninitializedData,
|
28 |
+
'AddressOfEntryPoint': hex(pe.OPTIONAL_HEADER.AddressOfEntryPoint),
|
29 |
+
|
30 |
+
'ImageBase': hex(pe.OPTIONAL_HEADER.ImageBase),
|
31 |
+
|
32 |
+
'MajorImageVersion': pe.OPTIONAL_HEADER.MajorImageVersion,
|
33 |
+
'MajorSubsystemVersion': pe.OPTIONAL_HEADER.MajorSubsystemVersion,
|
34 |
+
|
35 |
+
'SizeOfImage': pe.OPTIONAL_HEADER.SizeOfImage,
|
36 |
+
'SizeOfHeaders': pe.OPTIONAL_HEADER.SizeOfHeaders,
|
37 |
+
'Subsystem': pe.OPTIONAL_HEADER.Subsystem,
|
38 |
+
|
39 |
+
'SizeOfHeapReserve': pe.OPTIONAL_HEADER.SizeOfHeapReserve,
|
40 |
+
|
41 |
+
'NumberOfRvaAndSizes': pe.OPTIONAL_HEADER.NumberOfRvaAndSizes,
|
42 |
+
'DataDirectory': pe.OPTIONAL_HEADER.DATA_DIRECTORY,
|
43 |
+
}
|
44 |
+
return info
|
45 |
+
except Exception as e:
|
46 |
+
return str(e)
|
47 |
+
|
48 |
+
def inspect_pe_attributes(file_path):
|
49 |
+
try:
|
50 |
+
pe = pefile.PE(file_path)
|
51 |
+
# Récupérer la liste des entrées DATA_DIRECTORY
|
52 |
+
data_directory_entries = pe.OPTIONAL_HEADER.DATA_DIRECTORY
|
53 |
+
|
54 |
+
# Parcourir la liste pour trouver l'entrée du répertoire des ressources
|
55 |
+
for entry in data_directory_entries:
|
56 |
+
if entry.name == "IMAGE_DIRECTORY_ENTRY_RESOURCE":
|
57 |
+
resource_size = entry.Size
|
58 |
+
return resource_size
|
59 |
+
except Exception as e:
|
60 |
+
return f"Erreur d'inspection du fichier {file_path}: {str(e)}"
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
def upload_file():
|
65 |
+
file_path = filedialog.askopenfilename()
|
66 |
+
if file_path:
|
67 |
+
pe_info = extract_pe_info(file_path)
|
68 |
+
print(pe_info)
|
69 |
+
|
70 |
+
# Création de l'interface graphique
|
71 |
+
root = tk.Tk()
|
72 |
+
root.title("PE File Info Extractor")
|
73 |
+
|
74 |
+
upload_button = tk.Button(root, text="Upload PE File", command=upload_file)
|
75 |
+
upload_button.pack(pady=20)
|
76 |
+
|
77 |
+
root.mainloop()
|