{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Finetuning efficientNet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Lets try the model that is trending for image classification on HuggingFace: efficientnet_b2.ra_in1k" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "0a87f155dc5d480c8b68caf0c69f69cd", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading readme: 0%| | 0.00/5.16k [00:00 2].index)]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "352630377c4f42adad3b161fd95e7545", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Casting to class labels: 0%| | 0/25725 [00:00" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Run data is saved locally in /home/james/CodingProjects/ArcPostDoc/ArtifactClassification/notebooks/wandb/run-20240214_115817-fyblqcba" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "Syncing run passionate-lovebird-214 to Weights & Biases (docs)
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View project at https://wandb.ai/jameswburton18/huggingface" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ " View run at https://wandb.ai/jameswburton18/huggingface/runs/fyblqcba" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a8012d9b2c7b47c5aa2533983016d3a2", "version_major": 2, "version_minor": 0 }, "text/plain": [ " 0%| | 0/3750 [00:00 28\u001b[0m \u001b[43mtrainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1539\u001b[0m, in \u001b[0;36mTrainer.train\u001b[0;34m(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)\u001b[0m\n\u001b[1;32m 1537\u001b[0m hf_hub_utils\u001b[38;5;241m.\u001b[39menable_progress_bars()\n\u001b[1;32m 1538\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 1539\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43minner_training_loop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1540\u001b[0m \u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1541\u001b[0m \u001b[43m \u001b[49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_from_checkpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1542\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrial\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrial\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1543\u001b[0m \u001b[43m \u001b[49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mignore_keys_for_eval\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1544\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:1881\u001b[0m, in \u001b[0;36mTrainer._inner_training_loop\u001b[0;34m(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)\u001b[0m\n\u001b[1;32m 1878\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1879\u001b[0m tr_loss \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m tr_loss_step\n\u001b[0;32m-> 1881\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcurrent_flos \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;28mfloat\u001b[39m(\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[1;32m 1883\u001b[0m is_last_step_and_steps_less_than_grad_acc \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 1884\u001b[0m steps_in_epoch \u001b[38;5;241m<\u001b[39m\u001b[38;5;241m=\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;129;01mand\u001b[39;00m (step \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m) \u001b[38;5;241m==\u001b[39m steps_in_epoch\n\u001b[1;32m 1885\u001b[0m )\n\u001b[1;32m 1887\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[1;32m 1888\u001b[0m total_batched_samples \u001b[38;5;241m%\u001b[39m args\u001b[38;5;241m.\u001b[39mgradient_accumulation_steps \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 1889\u001b[0m \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1893\u001b[0m \u001b[38;5;66;03m# the `or` condition of `is_last_step_and_steps_less_than_grad_acc` is not covered\u001b[39;00m\n\u001b[1;32m 1894\u001b[0m \u001b[38;5;66;03m# in accelerate. So, explicitly enable sync gradients to True in that case.\u001b[39;00m\n", "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/trainer.py:3543\u001b[0m, in \u001b[0;36mTrainer.floating_point_ops\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 3530\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 3531\u001b[0m \u001b[38;5;124;03mFor models that inherit from [`PreTrainedModel`], uses that method to compute the number of floating point\u001b[39;00m\n\u001b[1;32m 3532\u001b[0m \u001b[38;5;124;03moperations for every backward + forward pass. If using another model, either implement such a method in the\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 3540\u001b[0m \u001b[38;5;124;03m `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m 3541\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 3542\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmodel, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfloating_point_ops\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m-> 3543\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmodel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfloating_point_ops\u001b[49m\u001b[43m(\u001b[49m\u001b[43minputs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3544\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 3545\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m0\u001b[39m\n", "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1154\u001b[0m, in \u001b[0;36mModuleUtilsMixin.floating_point_ops\u001b[0;34m(self, input_dict, exclude_embeddings)\u001b[0m\n\u001b[1;32m 1130\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfloating_point_ops\u001b[39m(\n\u001b[1;32m 1131\u001b[0m \u001b[38;5;28mself\u001b[39m, input_dict: Dict[\u001b[38;5;28mstr\u001b[39m, Union[torch\u001b[38;5;241m.\u001b[39mTensor, Any]], exclude_embeddings: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 1132\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28mint\u001b[39m:\n\u001b[1;32m 1133\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1134\u001b[0m \u001b[38;5;124;03m Get number of (optionally, non-embeddings) floating-point operations for the forward and backward passes of a\u001b[39;00m\n\u001b[1;32m 1135\u001b[0m \u001b[38;5;124;03m batch with this transformer model. Default approximation neglects the quadratic dependency on the number of\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1151\u001b[0m \u001b[38;5;124;03m `int`: The number of floating-point operations.\u001b[39;00m\n\u001b[1;32m 1152\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 1154\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimate_tokens(input_dict) \u001b[38;5;241m*\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mnum_parameters\u001b[49m\u001b[43m(\u001b[49m\u001b[43mexclude_embeddings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude_embeddings\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1078\u001b[0m, in \u001b[0;36mModuleUtilsMixin.num_parameters\u001b[0;34m(self, only_trainable, exclude_embeddings)\u001b[0m\n\u001b[1;32m 1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m 1065\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[38;5;124;03m `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[0;32m-> 1078\u001b[0m embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1079\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(module_type, nn\u001b[38;5;241m.\u001b[39mEmbedding)\n\u001b[1;32m 1080\u001b[0m ]\n\u001b[1;32m 1081\u001b[0m total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1082\u001b[0m parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m 1083\u001b[0m ]\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "File \u001b[0;32m~/.virtualenvs/ArtifactClassification/lib/python3.10/site-packages/transformers/modeling_utils.py:1079\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 1063\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1064\u001b[0m \u001b[38;5;124;03mGet number of (optionally, trainable or non-embeddings) parameters in the module.\u001b[39;00m\n\u001b[1;32m 1065\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1074\u001b[0m \u001b[38;5;124;03m `int`: The number of parameters.\u001b[39;00m\n\u001b[1;32m 1075\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 1077\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m exclude_embeddings:\n\u001b[1;32m 1078\u001b[0m embedding_param_names \u001b[38;5;241m=\u001b[39m [\n\u001b[0;32m-> 1079\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.weight\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mfor\u001b[39;00m name, module_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_modules() \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28;43misinstance\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmodule_type\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnn\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mEmbedding\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1080\u001b[0m ]\n\u001b[1;32m 1081\u001b[0m total_parameters \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 1082\u001b[0m parameter \u001b[38;5;28;01mfor\u001b[39;00m name, parameter \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mnamed_parameters() \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m embedding_param_names\n\u001b[1;32m 1083\u001b[0m ]\n\u001b[1;32m 1084\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "training_args = TrainingArguments(\n", " output_dir=\"cifar10_efficientnet\",\n", " remove_unused_columns=False,\n", " evaluation_strategy=\"epoch\",\n", " save_strategy=\"epoch\",\n", " learning_rate=5e-5,\n", " per_device_train_batch_size=8, # memory error with 16\n", " gradient_accumulation_steps=4,\n", " per_device_eval_batch_size=8,\n", " num_train_epochs=3,\n", " warmup_ratio=0.1,\n", " logging_steps=10,\n", " load_best_model_at_end=True,\n", " metric_for_best_model=\"accuracy\",\n", " push_to_hub=False,\n", ")\n", "\n", "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " data_collator=data_collator,\n", " train_dataset=cifar10dataset[\"train\"],\n", " eval_dataset=cifar10dataset[\"test\"],\n", " tokenizer=image_processor,\n", " compute_metrics=compute_metrics,\n", ")\n", "\n", "trainer.train()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "results = trainer.evaluate()\n", "print(results)\n", "\n", "test_results = trainer.predict(" ] } ], "metadata": { "kernelspec": { "display_name": "venv_bloom-classifier", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }