{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "collapsed_sections": [
        "bmXln03G-6jt",
        "NJN3_9QzBMFz",
        "NJUwS6uF_nPO",
        "aAlncS4tPKyR",
        "qpg8QGRLckvg",
        "pwDRSup3lhnX",
        "7hDyA__j0_cg",
        "DeD2dGjbMWQS",
        "MnzenpVK1RUw",
        "YLFxW6OTmkSz",
        "He3sYIMuYHdj",
        "GutjrXHHb0_B",
        "u9NimG14vOGS",
        "MSgsUE3a7F6V"
      ]
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "widgets": {
      "application/vnd.jupyter.widget-state+json": {
        "894f0647a28f4449a63e680841518b0d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_707e977651d6492082bb6615ba6b7d6e",
              "IPY_MODEL_732150fa2e1a43c7aa61719cb083d6ac",
              "IPY_MODEL_86eae60d0e184e27a6d1f73bc7f37560"
            ],
            "layout": "IPY_MODEL_8ad5f7a38f9d4b4780a3a45a9842a56d"
          }
        },
        "707e977651d6492082bb6615ba6b7d6e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_fb6e2cca3ca84dafbf09599481314a2a",
            "placeholder": "​",
            "style": "IPY_MODEL_3188c60873aa4d2faa5be0ea04ea3e09",
            "value": "Epoch 5: 100%"
          }
        },
        "732150fa2e1a43c7aa61719cb083d6ac": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_befb029ec4814bf8b12ed8b7c222d8ab",
            "max": 48,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_f801d767b2d446229f30c418604c6d84",
            "value": 48
          }
        },
        "86eae60d0e184e27a6d1f73bc7f37560": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b459c7018ee94121891d27687ba5ae59",
            "placeholder": "​",
            "style": "IPY_MODEL_d1dad85ea0ae46fab93b0cc0cbc86310",
            "value": " 48/48 [00:17&lt;00:00,  2.67it/s, v_num=1, train_loss=25.90]"
          }
        },
        "8ad5f7a38f9d4b4780a3a45a9842a56d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": "inline-flex",
            "flex": null,
            "flex_flow": "row wrap",
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": "100%"
          }
        },
        "fb6e2cca3ca84dafbf09599481314a2a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3188c60873aa4d2faa5be0ea04ea3e09": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "befb029ec4814bf8b12ed8b7c222d8ab": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": "2",
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f801d767b2d446229f30c418604c6d84": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b459c7018ee94121891d27687ba5ae59": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d1dad85ea0ae46fab93b0cc0cbc86310": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "dca4feb8999943458c52402e4821fd9b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_962de14c45d440f981cab7f98bb999d4",
              "IPY_MODEL_e1670826b6984372b3e1f6d38dc775ce",
              "IPY_MODEL_3cd0ed60902944d39d3f0c1acd41cdc3"
            ],
            "layout": "IPY_MODEL_a73eb0974895406f923d3db27483084f"
          }
        },
        "962de14c45d440f981cab7f98bb999d4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c7645ed1ba1a4401be3523e96b611adb",
            "placeholder": "​",
            "style": "IPY_MODEL_d5d2baac77dd489c911e851aea84b2e8",
            "value": "modules.json: 100%"
          }
        },
        "e1670826b6984372b3e1f6d38dc775ce": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_de9347d60d8b43888b3603bbfbceb2af",
            "max": 229,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_a627519796034c88a763f9cd2ec0808e",
            "value": 229
          }
        },
        "3cd0ed60902944d39d3f0c1acd41cdc3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7a868d4c9e3f4a70961c2d056d6a5653",
            "placeholder": "​",
            "style": "IPY_MODEL_b2b43b9c16664528a1c55e66805eff76",
            "value": " 229/229 [00:00&lt;00:00, 4.52kB/s]"
          }
        },
        "a73eb0974895406f923d3db27483084f": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c7645ed1ba1a4401be3523e96b611adb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d5d2baac77dd489c911e851aea84b2e8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "de9347d60d8b43888b3603bbfbceb2af": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "a627519796034c88a763f9cd2ec0808e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "7a868d4c9e3f4a70961c2d056d6a5653": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b2b43b9c16664528a1c55e66805eff76": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d02d64dc3c9a499491861bef90086d82": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_7aad2011378e434eac5d46c19a117631",
              "IPY_MODEL_208acf230279450ca859204736946a28",
              "IPY_MODEL_641efe28133240ccbcff70a3d4424249"
            ],
            "layout": "IPY_MODEL_95152a8da2dd4333afeb475aec2b3def"
          }
        },
        "7aad2011378e434eac5d46c19a117631": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0388f73993794aa3aba737c2ff7a06e7",
            "placeholder": "​",
            "style": "IPY_MODEL_89414b1d7b06439c8bdb33948be5d11e",
            "value": "config_sentence_transformers.json: 100%"
          }
        },
        "208acf230279450ca859204736946a28": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d1f4213d8cbe4416a26520c67b8898ef",
            "max": 122,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d3d80ca0e7dd4ec396e96892ec86e17c",
            "value": 122
          }
        },
        "641efe28133240ccbcff70a3d4424249": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_8843fcd259744d94979cc460e24f5ba3",
            "placeholder": "​",
            "style": "IPY_MODEL_0de221a4d2d644c885c0d3f82f8b924c",
            "value": " 122/122 [00:00&lt;00:00, 1.57kB/s]"
          }
        },
        "95152a8da2dd4333afeb475aec2b3def": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0388f73993794aa3aba737c2ff7a06e7": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "89414b1d7b06439c8bdb33948be5d11e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d1f4213d8cbe4416a26520c67b8898ef": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d3d80ca0e7dd4ec396e96892ec86e17c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "8843fcd259744d94979cc460e24f5ba3": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0de221a4d2d644c885c0d3f82f8b924c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "9ed3dd803a884c52aff984be748b0fa2": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_d8d8af05c11e4ecfa4c75f9eea5f581a",
              "IPY_MODEL_134b61665b5f42ebaa18710e78e4b37a",
              "IPY_MODEL_8257e6f8f14c4754a938dd2ac67002aa"
            ],
            "layout": "IPY_MODEL_c16eb49089ee4cbfb42b58b09c529608"
          }
        },
        "d8d8af05c11e4ecfa4c75f9eea5f581a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_be6a7355b7be437586bb693fecae802d",
            "placeholder": "​",
            "style": "IPY_MODEL_4969a761d9f34a62b5502683ad93a723",
            "value": "README.md: 100%"
          }
        },
        "134b61665b5f42ebaa18710e78e4b37a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_f39aa003b8b243efa9992302d9c39ecb",
            "max": 4126,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6556302f910349f7aea105bcc4a4e516",
            "value": 4126
          }
        },
        "8257e6f8f14c4754a938dd2ac67002aa": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_819d6c6459fc4a71bf37cce6daf4cd04",
            "placeholder": "​",
            "style": "IPY_MODEL_6a5b78e5f5834238b48b3b893d1a797c",
            "value": " 4.13k/4.13k [00:00&lt;00:00, 79.0kB/s]"
          }
        },
        "c16eb49089ee4cbfb42b58b09c529608": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "be6a7355b7be437586bb693fecae802d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4969a761d9f34a62b5502683ad93a723": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f39aa003b8b243efa9992302d9c39ecb": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6556302f910349f7aea105bcc4a4e516": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "819d6c6459fc4a71bf37cce6daf4cd04": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6a5b78e5f5834238b48b3b893d1a797c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "d2acc67c459840c5872a0aed088a673a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_b22bd6adc4944ac2a8223d1fc54e278e",
              "IPY_MODEL_9eed2253e2794b96b078174c26f1599e",
              "IPY_MODEL_9c12caeb617e4a079bb4960698a9addf"
            ],
            "layout": "IPY_MODEL_f96eeb6fb26243a0988eed5281101c36"
          }
        },
        "b22bd6adc4944ac2a8223d1fc54e278e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_543c6458885e4c92911ac85a6bceff99",
            "placeholder": "​",
            "style": "IPY_MODEL_be795953cbe942d891d3e2e1a857b1c7",
            "value": "sentence_bert_config.json: 100%"
          }
        },
        "9eed2253e2794b96b078174c26f1599e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cffa853377644f2f8a8bba31795146a0",
            "max": 53,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_08a68525c0174d77b8fa1f7a16b37f3d",
            "value": 53
          }
        },
        "9c12caeb617e4a079bb4960698a9addf": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e5f8df883014424997370c858ca75119",
            "placeholder": "​",
            "style": "IPY_MODEL_980b8f08ae1f441ba4fcd9fec7bba172",
            "value": " 53.0/53.0 [00:00&lt;00:00, 1.17kB/s]"
          }
        },
        "f96eeb6fb26243a0988eed5281101c36": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "543c6458885e4c92911ac85a6bceff99": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "be795953cbe942d891d3e2e1a857b1c7": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "cffa853377644f2f8a8bba31795146a0": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "08a68525c0174d77b8fa1f7a16b37f3d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e5f8df883014424997370c858ca75119": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "980b8f08ae1f441ba4fcd9fec7bba172": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "fb399b6ccf14488e8f3a5ee25ba1f519": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_9689a4b5fc3443c988355d51fb8b31dd",
              "IPY_MODEL_07c7545951bd45838b37656a7d55c4f5",
              "IPY_MODEL_375f551994f141a197d717059513a15a"
            ],
            "layout": "IPY_MODEL_f8e89fd727a747d1a205f0ea3452a94a"
          }
        },
        "9689a4b5fc3443c988355d51fb8b31dd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b0d6e4b6564c46348efe5ee5e24a5a36",
            "placeholder": "​",
            "style": "IPY_MODEL_ab1c746c9a414d16a11db2ecad0797ab",
            "value": "config.json: 100%"
          }
        },
        "07c7545951bd45838b37656a7d55c4f5": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_7297eacf7ca34b3bba0029375b8c811a",
            "max": 723,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_0075b2bdafa04862b6a871356351dfa1",
            "value": 723
          }
        },
        "375f551994f141a197d717059513a15a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_64db601a4cca46a882e32f771c3dba3d",
            "placeholder": "​",
            "style": "IPY_MODEL_80946e3d99314550b777b8a62ffe0521",
            "value": " 723/723 [00:00&lt;00:00, 16.4kB/s]"
          }
        },
        "f8e89fd727a747d1a205f0ea3452a94a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b0d6e4b6564c46348efe5ee5e24a5a36": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ab1c746c9a414d16a11db2ecad0797ab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "7297eacf7ca34b3bba0029375b8c811a": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0075b2bdafa04862b6a871356351dfa1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "64db601a4cca46a882e32f771c3dba3d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "80946e3d99314550b777b8a62ffe0521": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "f810aade9a2943aaa5949f30774ab26f": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ea5a3b55a5bc450aa0f92c88625778f8",
              "IPY_MODEL_ba21fc338bfc4269bef02e50c0711b7c",
              "IPY_MODEL_5a7c1b813c5b4ceea534f74765c5e605"
            ],
            "layout": "IPY_MODEL_894779b9a1384b668593ae969055c32c"
          }
        },
        "ea5a3b55a5bc450aa0f92c88625778f8": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_75d1b0ba8e7048cfb30e507ec3f24ff8",
            "placeholder": "​",
            "style": "IPY_MODEL_59dda900f0894c54b6ce7302e761ad43",
            "value": "model.safetensors: 100%"
          }
        },
        "ba21fc338bfc4269bef02e50c0711b7c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_bd87f489e77d44d9b1e066f16c0ca9ed",
            "max": 1112201288,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_4e0bd61e94594f5db415e7d5315de7c0",
            "value": 1112201288
          }
        },
        "5a7c1b813c5b4ceea534f74765c5e605": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_c239ef650cb54478825730509e2a1bd2",
            "placeholder": "​",
            "style": "IPY_MODEL_ee1503ea36ab41d2a48155fbe442d909",
            "value": " 1.11G/1.11G [00:14&lt;00:00, 95.1MB/s]"
          }
        },
        "894779b9a1384b668593ae969055c32c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "75d1b0ba8e7048cfb30e507ec3f24ff8": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "59dda900f0894c54b6ce7302e761ad43": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "bd87f489e77d44d9b1e066f16c0ca9ed": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "4e0bd61e94594f5db415e7d5315de7c0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "c239ef650cb54478825730509e2a1bd2": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ee1503ea36ab41d2a48155fbe442d909": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "8798e6b2ae124152bb6cca66b14c0d3a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ba5e2e03809a428796f8905392eae113",
              "IPY_MODEL_27067ff09cbe4ad091ffdd18ea30bc17",
              "IPY_MODEL_d722357cd295474290b5dc0ebd704f4c"
            ],
            "layout": "IPY_MODEL_d89ed637839f4c9abd4268f3d71f3eab"
          }
        },
        "ba5e2e03809a428796f8905392eae113": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_63c52b32dbae41b4a82020cbf030ddb4",
            "placeholder": "​",
            "style": "IPY_MODEL_3adaba744a5740318b2cf4188429acc0",
            "value": "tokenizer_config.json: 100%"
          }
        },
        "27067ff09cbe4ad091ffdd18ea30bc17": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_cc0fc21f8f2d43b4859f1886a33cf0d9",
            "max": 402,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_d696138c3379401fae995f49d0d4cd48",
            "value": 402
          }
        },
        "d722357cd295474290b5dc0ebd704f4c": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_e291c590d5ad4f62b29f65a451fdea09",
            "placeholder": "​",
            "style": "IPY_MODEL_f42a6bf637f846659b0f7d7d198e6eb4",
            "value": " 402/402 [00:00&lt;00:00, 23.5kB/s]"
          }
        },
        "d89ed637839f4c9abd4268f3d71f3eab": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "63c52b32dbae41b4a82020cbf030ddb4": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3adaba744a5740318b2cf4188429acc0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "cc0fc21f8f2d43b4859f1886a33cf0d9": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "d696138c3379401fae995f49d0d4cd48": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "e291c590d5ad4f62b29f65a451fdea09": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "f42a6bf637f846659b0f7d7d198e6eb4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "4a74fa41f5d8471e8db6f79ee06d88a1": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_df3a741ce3334174a1ba1c185bcf7e4d",
              "IPY_MODEL_27e51c8b4ed54557987dc6362be11609",
              "IPY_MODEL_af075877b5864bd3b6f98b13695f1e0d"
            ],
            "layout": "IPY_MODEL_b8935da2360a4a8f8be0f1cf12049704"
          }
        },
        "df3a741ce3334174a1ba1c185bcf7e4d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_0007b3003cf94edfbe879278e68ad521",
            "placeholder": "​",
            "style": "IPY_MODEL_91d456c10ac14ddda54da8f6b0d6c45a",
            "value": "sentencepiece.bpe.model: 100%"
          }
        },
        "27e51c8b4ed54557987dc6362be11609": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_304f170991ee4e538908b36bc5ee5c71",
            "max": 5069051,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_e1cebdb893b64ef0ac977255e216143a",
            "value": 5069051
          }
        },
        "af075877b5864bd3b6f98b13695f1e0d": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b7327644f1a14c9f9b620747902501f1",
            "placeholder": "​",
            "style": "IPY_MODEL_ef7f978170c746f09aaa8f1450c02548",
            "value": " 5.07M/5.07M [00:00&lt;00:00, 78.9MB/s]"
          }
        },
        "b8935da2360a4a8f8be0f1cf12049704": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "0007b3003cf94edfbe879278e68ad521": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "91d456c10ac14ddda54da8f6b0d6c45a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "304f170991ee4e538908b36bc5ee5c71": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "e1cebdb893b64ef0ac977255e216143a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "b7327644f1a14c9f9b620747902501f1": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "ef7f978170c746f09aaa8f1450c02548": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "757581a0b5ca4ab5af853223f95e21e4": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_ace6230a55254740919ad2ee94309bff",
              "IPY_MODEL_569e73779e594bc0acb72bdcec240c26",
              "IPY_MODEL_506eec7a17cd4657a36d3970c81689b0"
            ],
            "layout": "IPY_MODEL_d0cb73af415c49f8887fe530cb58e218"
          }
        },
        "ace6230a55254740919ad2ee94309bff": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_b285870fcfd04f13a064b4f41bcdc15c",
            "placeholder": "​",
            "style": "IPY_MODEL_3b63a5eb40bc46dbb59ddf0dab56f961",
            "value": "tokenizer.json: 100%"
          }
        },
        "569e73779e594bc0acb72bdcec240c26": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_39abfc34a8de449ab918374cd617983d",
            "max": 9081518,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_6dbd7ac42eae437092baca2fcc660b50",
            "value": 9081518
          }
        },
        "506eec7a17cd4657a36d3970c81689b0": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_31434f024801490aa52a110bb0814510",
            "placeholder": "​",
            "style": "IPY_MODEL_61f56526d84e409d879cfd9935eb5084",
            "value": " 9.08M/9.08M [00:00&lt;00:00, 23.0MB/s]"
          }
        },
        "d0cb73af415c49f8887fe530cb58e218": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "b285870fcfd04f13a064b4f41bcdc15c": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "3b63a5eb40bc46dbb59ddf0dab56f961": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "39abfc34a8de449ab918374cd617983d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6dbd7ac42eae437092baca2fcc660b50": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "31434f024801490aa52a110bb0814510": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "61f56526d84e409d879cfd9935eb5084": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "ffdf2ed2b8d2412997a9845a85d1f4ab": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_5aa4451625f04032a62b35829b5cbfa3",
              "IPY_MODEL_e8202c4e66814d07bf4e7e980e930a90",
              "IPY_MODEL_a54eef1414914da0981e3037d19e79bd"
            ],
            "layout": "IPY_MODEL_8dcd120dfe98453b9f4e9cb145696940"
          }
        },
        "5aa4451625f04032a62b35829b5cbfa3": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1741b6afdc08426ea2f7058457413999",
            "placeholder": "​",
            "style": "IPY_MODEL_c87cf21dbf074c699c1c57aa604940e6",
            "value": "special_tokens_map.json: 100%"
          }
        },
        "e8202c4e66814d07bf4e7e980e930a90": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3dec68173930485d82b3a0d53deda040",
            "max": 239,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_320ea31c60b34983bf6b9c1a6d258eeb",
            "value": 239
          }
        },
        "a54eef1414914da0981e3037d19e79bd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_1d7d3a59a8cc4e03825aebbd9fa8bdb6",
            "placeholder": "​",
            "style": "IPY_MODEL_07215f1e31c44c8893b096808be4d96e",
            "value": " 239/239 [00:00&lt;00:00, 22.5kB/s]"
          }
        },
        "8dcd120dfe98453b9f4e9cb145696940": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "1741b6afdc08426ea2f7058457413999": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "c87cf21dbf074c699c1c57aa604940e6": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3dec68173930485d82b3a0d53deda040": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "320ea31c60b34983bf6b9c1a6d258eeb": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "1d7d3a59a8cc4e03825aebbd9fa8bdb6": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "07215f1e31c44c8893b096808be4d96e": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "438a2b975062455fb5fc06eb3cdaf625": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HBoxModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HBoxModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HBoxView",
            "box_style": "",
            "children": [
              "IPY_MODEL_0445438e0b9c4ac680f5e82183708492",
              "IPY_MODEL_5d0f54bb9df64b8786538424532d6397",
              "IPY_MODEL_a325f3831f664a0c96061d16e1d9c358"
            ],
            "layout": "IPY_MODEL_aac58743d8d64b8591abe9d421a43565"
          }
        },
        "0445438e0b9c4ac680f5e82183708492": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_aa49127f1f4d4b168a5d5b69dc7fed0d",
            "placeholder": "​",
            "style": "IPY_MODEL_fe79e4d23eb64d82bd818ac4c369303a",
            "value": "1_Pooling%2Fconfig.json: 100%"
          }
        },
        "5d0f54bb9df64b8786538424532d6397": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "FloatProgressModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "FloatProgressModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "ProgressView",
            "bar_style": "success",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_3d60222523434c1fbc1647b104d2af20",
            "max": 190,
            "min": 0,
            "orientation": "horizontal",
            "style": "IPY_MODEL_44dba6a745254c3ab17a76ba184b4ecd",
            "value": 190
          }
        },
        "a325f3831f664a0c96061d16e1d9c358": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "HTMLModel",
          "model_module_version": "1.5.0",
          "state": {
            "_dom_classes": [],
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "HTMLModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/controls",
            "_view_module_version": "1.5.0",
            "_view_name": "HTMLView",
            "description": "",
            "description_tooltip": null,
            "layout": "IPY_MODEL_d530476ab4934024a5753e5a520d7772",
            "placeholder": "​",
            "style": "IPY_MODEL_6bc1fefb830a4636b06a0f692bf7d71b",
            "value": " 190/190 [00:00&lt;00:00, 10.9kB/s]"
          }
        },
        "aac58743d8d64b8591abe9d421a43565": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "aa49127f1f4d4b168a5d5b69dc7fed0d": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "fe79e4d23eb64d82bd818ac4c369303a": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        },
        "3d60222523434c1fbc1647b104d2af20": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "44dba6a745254c3ab17a76ba184b4ecd": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "ProgressStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "ProgressStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "bar_color": null,
            "description_width": ""
          }
        },
        "d530476ab4934024a5753e5a520d7772": {
          "model_module": "@jupyter-widgets/base",
          "model_name": "LayoutModel",
          "model_module_version": "1.2.0",
          "state": {
            "_model_module": "@jupyter-widgets/base",
            "_model_module_version": "1.2.0",
            "_model_name": "LayoutModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "LayoutView",
            "align_content": null,
            "align_items": null,
            "align_self": null,
            "border": null,
            "bottom": null,
            "display": null,
            "flex": null,
            "flex_flow": null,
            "grid_area": null,
            "grid_auto_columns": null,
            "grid_auto_flow": null,
            "grid_auto_rows": null,
            "grid_column": null,
            "grid_gap": null,
            "grid_row": null,
            "grid_template_areas": null,
            "grid_template_columns": null,
            "grid_template_rows": null,
            "height": null,
            "justify_content": null,
            "justify_items": null,
            "left": null,
            "margin": null,
            "max_height": null,
            "max_width": null,
            "min_height": null,
            "min_width": null,
            "object_fit": null,
            "object_position": null,
            "order": null,
            "overflow": null,
            "overflow_x": null,
            "overflow_y": null,
            "padding": null,
            "right": null,
            "top": null,
            "visibility": null,
            "width": null
          }
        },
        "6bc1fefb830a4636b06a0f692bf7d71b": {
          "model_module": "@jupyter-widgets/controls",
          "model_name": "DescriptionStyleModel",
          "model_module_version": "1.5.0",
          "state": {
            "_model_module": "@jupyter-widgets/controls",
            "_model_module_version": "1.5.0",
            "_model_name": "DescriptionStyleModel",
            "_view_count": null,
            "_view_module": "@jupyter-widgets/base",
            "_view_module_version": "1.2.0",
            "_view_name": "StyleView",
            "description_width": ""
          }
        }
      }
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# ***REGRESSION***"
      ],
      "metadata": {
        "id": "bmXln03G-6jt"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install catboost\n",
        "!pip install optuna"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "DjZTGudd_ZNL",
        "outputId": "27f69bb2-f6a3-467d-a88a-94f2ad9533ce"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting catboost\n",
            "  Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl.metadata (1.2 kB)\n",
            "Requirement already satisfied: graphviz in /usr/local/lib/python3.11/dist-packages (from catboost) (0.20.3)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from catboost) (3.10.0)\n",
            "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.11/dist-packages (from catboost) (1.26.4)\n",
            "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.11/dist-packages (from catboost) (2.2.2)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from catboost) (1.13.1)\n",
            "Requirement already satisfied: plotly in /usr/local/lib/python3.11/dist-packages (from catboost) (5.24.1)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.11/dist-packages (from catboost) (1.17.0)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=0.24->catboost) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=0.24->catboost) (2025.1)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=0.24->catboost) (2025.1)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (1.3.1)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (0.12.1)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (4.56.0)\n",
            "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (1.4.8)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (24.2)\n",
            "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (11.1.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (3.2.1)\n",
            "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.11/dist-packages (from plotly->catboost) (9.0.0)\n",
            "Downloading catboost-1.2.7-cp311-cp311-manylinux2014_x86_64.whl (98.7 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m98.7/98.7 MB\u001b[0m \u001b[31m8.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: catboost\n",
            "Successfully installed catboost-1.2.7\n",
            "Collecting optuna\n",
            "  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)\n",
            "Collecting alembic>=1.5.0 (from optuna)\n",
            "  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)\n",
            "Collecting colorlog (from optuna)\n",
            "  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from optuna) (1.26.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from optuna) (24.2)\n",
            "Requirement already satisfied: sqlalchemy>=1.4.2 in /usr/local/lib/python3.11/dist-packages (from optuna) (2.0.38)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from optuna) (4.67.1)\n",
            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.11/dist-packages (from optuna) (6.0.2)\n",
            "Collecting Mako (from alembic>=1.5.0->optuna)\n",
            "  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)\n",
            "Requirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.11/dist-packages (from alembic>=1.5.0->optuna) (4.12.2)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from sqlalchemy>=1.4.2->optuna) (3.1.1)\n",
            "Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.11/dist-packages (from Mako->alembic>=1.5.0->optuna) (3.0.2)\n",
            "Downloading optuna-4.2.1-py3-none-any.whl (383 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m383.6/383.6 kB\u001b[0m \u001b[31m19.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m233.6/233.6 kB\u001b[0m \u001b[31m19.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)\n",
            "Downloading Mako-1.3.9-py3-none-any.whl (78 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.5/78.5 kB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: Mako, colorlog, alembic, optuna\n",
            "Successfully installed Mako-1.3.9 alembic-1.14.1 colorlog-6.9.0 optuna-4.2.1\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.impute import SimpleImputer\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "from catboost import CatBoostRegressor, Pool\n",
        "from sklearn.metrics import mean_squared_error\n",
        "import optuna\n",
        "from sklearn.model_selection import train_test_split"
      ],
      "metadata": {
        "id": "Mm3YgMLcxrTF"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train = pd.read_csv('/content/train (6).csv', index_col='Id')\n",
        "train.head(3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 224
        },
        "id": "pMBWe5Bs-6RU",
        "outputId": "041e66c6-2e4e-4c9d-c4a6-ee0f9b2e84d0"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "    MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
              "Id                                                                    \n",
              "1           60       RL         65.0     8450   Pave   NaN      Reg   \n",
              "2           20       RL         80.0     9600   Pave   NaN      Reg   \n",
              "3           60       RL         68.0    11250   Pave   NaN      IR1   \n",
              "\n",
              "   LandContour Utilities LotConfig  ... PoolArea PoolQC Fence MiscFeature  \\\n",
              "Id                                  ...                                     \n",
              "1          Lvl    AllPub    Inside  ...        0    NaN   NaN         NaN   \n",
              "2          Lvl    AllPub       FR2  ...        0    NaN   NaN         NaN   \n",
              "3          Lvl    AllPub    Inside  ...        0    NaN   NaN         NaN   \n",
              "\n",
              "   MiscVal MoSold  YrSold  SaleType  SaleCondition  SalePrice  \n",
              "Id                                                             \n",
              "1        0      2    2008        WD         Normal     208500  \n",
              "2        0      5    2007        WD         Normal     181500  \n",
              "3        0      9    2008        WD         Normal     223500  \n",
              "\n",
              "[3 rows x 80 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-7123518b-ceef-4856-9b52-ea365239eeb2\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>MSSubClass</th>\n",
              "      <th>MSZoning</th>\n",
              "      <th>LotFrontage</th>\n",
              "      <th>LotArea</th>\n",
              "      <th>Street</th>\n",
              "      <th>Alley</th>\n",
              "      <th>LotShape</th>\n",
              "      <th>LandContour</th>\n",
              "      <th>Utilities</th>\n",
              "      <th>LotConfig</th>\n",
              "      <th>...</th>\n",
              "      <th>PoolArea</th>\n",
              "      <th>PoolQC</th>\n",
              "      <th>Fence</th>\n",
              "      <th>MiscFeature</th>\n",
              "      <th>MiscVal</th>\n",
              "      <th>MoSold</th>\n",
              "      <th>YrSold</th>\n",
              "      <th>SaleType</th>\n",
              "      <th>SaleCondition</th>\n",
              "      <th>SalePrice</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Id</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>60</td>\n",
              "      <td>RL</td>\n",
              "      <td>65.0</td>\n",
              "      <td>8450</td>\n",
              "      <td>Pave</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Reg</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>Inside</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0</td>\n",
              "      <td>2</td>\n",
              "      <td>2008</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "      <td>208500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>20</td>\n",
              "      <td>RL</td>\n",
              "      <td>80.0</td>\n",
              "      <td>9600</td>\n",
              "      <td>Pave</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Reg</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>FR2</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0</td>\n",
              "      <td>5</td>\n",
              "      <td>2007</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "      <td>181500</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>60</td>\n",
              "      <td>RL</td>\n",
              "      <td>68.0</td>\n",
              "      <td>11250</td>\n",
              "      <td>Pave</td>\n",
              "      <td>NaN</td>\n",
              "      <td>IR1</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>Inside</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0</td>\n",
              "      <td>9</td>\n",
              "      <td>2008</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "      <td>223500</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>3 rows × 80 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7123518b-ceef-4856-9b52-ea365239eeb2')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-7123518b-ceef-4856-9b52-ea365239eeb2 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-7123518b-ceef-4856-9b52-ea365239eeb2');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-ec08b6c1-6571-41d7-a6f2-bd5f0f673460\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-ec08b6c1-6571-41d7-a6f2-bd5f0f673460')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-ec08b6c1-6571-41d7-a6f2-bd5f0f673460 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "train"
            }
          },
          "metadata": {},
          "execution_count": 25
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "columns = list(train.columns)\n",
        "categorical_features = []\n",
        "for i in columns:\n",
        "    if str(type(train[i].iloc[0])) == \"<class 'str'>\":\n",
        "        categorical_features.append(i)"
      ],
      "metadata": {
        "id": "AmXBmYG2DK_N"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "missing_values_table = train.isna().sum().reset_index()\n",
        "missing_values_table.columns = ['Column', 'Missing Values']\n",
        "missing_values_table['% of Total Values'] = (missing_values_table['Missing Values'] / len(train)) * 100\n",
        "\n",
        "print(missing_values_table['% of Total Values'])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WWzL-Oe-EILC",
        "outputId": "1118d6bb-f44f-4b4a-a0b4-f4b4bdcef6cf"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0      0.000000\n",
            "1      0.000000\n",
            "2     17.739726\n",
            "3      0.000000\n",
            "4      0.000000\n",
            "        ...    \n",
            "75     0.000000\n",
            "76     0.000000\n",
            "77     0.000000\n",
            "78     0.000000\n",
            "79     0.000000\n",
            "Name: % of Total Values, Length: 80, dtype: float64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "a = list(missing_values_table['% of Total Values'])\n",
        "num = []\n",
        "cat = []\n",
        "drp = []\n",
        "for i in range(len(columns)):\n",
        "    if missing_values_table['% of Total Values'].iloc[i] > 0:\n",
        "        if missing_values_table['% of Total Values'].iloc[i] > 40:\n",
        "            drp.append(str(missing_values_table['Column'].iloc[i]))\n",
        "        elif str(missing_values_table['Column'].iloc[i]) in categorical_features:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i], \"CATEGORICAL\")\n",
        "            cat.append(str(missing_values_table['Column'].iloc[i]))\n",
        "        else:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i])\n",
        "            num.append(str(missing_values_table['Column'].iloc[i]))\n",
        "\n",
        "print(drp)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "boRDfj2FE3MJ",
        "outputId": "cde30a7f-cf35-48ce-8801-6d8ce7453020"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "LotFrontage 17.73972602739726\n",
            "MasVnrArea 0.547945205479452\n",
            "BsmtQual 2.5342465753424657 CATEGORICAL\n",
            "BsmtCond 2.5342465753424657 CATEGORICAL\n",
            "BsmtExposure 2.6027397260273974 CATEGORICAL\n",
            "BsmtFinType1 2.5342465753424657 CATEGORICAL\n",
            "BsmtFinType2 2.6027397260273974 CATEGORICAL\n",
            "Electrical 0.0684931506849315 CATEGORICAL\n",
            "GarageType 5.5479452054794525 CATEGORICAL\n",
            "GarageYrBlt 5.5479452054794525\n",
            "GarageFinish 5.5479452054794525 CATEGORICAL\n",
            "GarageQual 5.5479452054794525 CATEGORICAL\n",
            "GarageCond 5.5479452054794525 CATEGORICAL\n",
            "['Alley', 'MasVnrType', 'FireplaceQu', 'PoolQC', 'Fence', 'MiscFeature']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "train = train.drop(drp, axis = 1)\n",
        "\n",
        "imputer = SimpleImputer(strategy='mean')\n",
        "train[num] = imputer.fit_transform(train[num])\n",
        "\n",
        "imputer_cat = SimpleImputer(strategy='most_frequent')\n",
        "train[cat] = imputer_cat.fit_transform(train[cat])"
      ],
      "metadata": {
        "id": "XoJkLYjGqzIM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "y = train['SalePrice']\n",
        "x = train.drop('SalePrice', axis =1)\n",
        "\n",
        "train_x, val_x, train_y, val_y = train_test_split(x, y, test_size=0.2)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 224
        },
        "id": "fjfWVR3__nMr",
        "outputId": "a6688bbb-d91e-473d-e0e2-c2810100ce5f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "      MSSubClass MSZoning  LotFrontage  LotArea Street LotShape LandContour  \\\n",
              "Id                                                                            \n",
              "973          120       RL         55.0     7892   Pave      Reg         Lvl   \n",
              "805           20       RL         75.0     9000   Pave      Reg         Lvl   \n",
              "1190          60       RL         60.0     7500   Pave      Reg         Lvl   \n",
              "\n",
              "     Utilities LotConfig LandSlope  ... OpenPorchSF EnclosedPorch 3SsnPorch  \\\n",
              "Id                                  ...                                       \n",
              "973     AllPub    Inside       Gtl  ...           0             0         0   \n",
              "805     AllPub    Inside       Gtl  ...           0             0         0   \n",
              "1190    AllPub    Inside       Gtl  ...          60             0         0   \n",
              "\n",
              "     ScreenPorch PoolArea  MiscVal  MoSold  YrSold  SaleType SaleCondition  \n",
              "Id                                                                          \n",
              "973            0        0        0       4    2010        WD        Normal  \n",
              "805            0        0        0       6    2006        WD        Family  \n",
              "1190           0        0        0       6    2010        WD        Normal  \n",
              "\n",
              "[3 rows x 73 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-44a2a813-8610-481e-9c96-8f411f4482b8\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>MSSubClass</th>\n",
              "      <th>MSZoning</th>\n",
              "      <th>LotFrontage</th>\n",
              "      <th>LotArea</th>\n",
              "      <th>Street</th>\n",
              "      <th>LotShape</th>\n",
              "      <th>LandContour</th>\n",
              "      <th>Utilities</th>\n",
              "      <th>LotConfig</th>\n",
              "      <th>LandSlope</th>\n",
              "      <th>...</th>\n",
              "      <th>OpenPorchSF</th>\n",
              "      <th>EnclosedPorch</th>\n",
              "      <th>3SsnPorch</th>\n",
              "      <th>ScreenPorch</th>\n",
              "      <th>PoolArea</th>\n",
              "      <th>MiscVal</th>\n",
              "      <th>MoSold</th>\n",
              "      <th>YrSold</th>\n",
              "      <th>SaleType</th>\n",
              "      <th>SaleCondition</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Id</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>973</th>\n",
              "      <td>120</td>\n",
              "      <td>RL</td>\n",
              "      <td>55.0</td>\n",
              "      <td>7892</td>\n",
              "      <td>Pave</td>\n",
              "      <td>Reg</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>Inside</td>\n",
              "      <td>Gtl</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>4</td>\n",
              "      <td>2010</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>805</th>\n",
              "      <td>20</td>\n",
              "      <td>RL</td>\n",
              "      <td>75.0</td>\n",
              "      <td>9000</td>\n",
              "      <td>Pave</td>\n",
              "      <td>Reg</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>Inside</td>\n",
              "      <td>Gtl</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>6</td>\n",
              "      <td>2006</td>\n",
              "      <td>WD</td>\n",
              "      <td>Family</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1190</th>\n",
              "      <td>60</td>\n",
              "      <td>RL</td>\n",
              "      <td>60.0</td>\n",
              "      <td>7500</td>\n",
              "      <td>Pave</td>\n",
              "      <td>Reg</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>Inside</td>\n",
              "      <td>Gtl</td>\n",
              "      <td>...</td>\n",
              "      <td>60</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>6</td>\n",
              "      <td>2010</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>3 rows × 73 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-44a2a813-8610-481e-9c96-8f411f4482b8')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-44a2a813-8610-481e-9c96-8f411f4482b8 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-44a2a813-8610-481e-9c96-8f411f4482b8');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-060439c1-fb83-4c5c-9edd-ea3927f8e36d\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-060439c1-fb83-4c5c-9edd-ea3927f8e36d')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-060439c1-fb83-4c5c-9edd-ea3927f8e36d button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "train_x"
            }
          },
          "metadata": {},
          "execution_count": 8
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "columns = list(train_x.columns)\n",
        "categorical_features = []\n",
        "for i in columns:\n",
        "    if str(type(train[i].iloc[0])) == \"<class 'str'>\":\n",
        "        categorical_features.append(i)"
      ],
      "metadata": {
        "id": "aGT0YgWAyag6"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def objective(trial):\n",
        "    params = {\n",
        "        \"iterations\": trial.suggest_int(\"iterations\", 500, 1000),\n",
        "        \"depth\": trial.suggest_int(\"depth\", 3, 6),\n",
        "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.01, 0.3, log=True),\n",
        "        \"l2_leaf_reg\": trial.suggest_float(\"l2_leaf_reg\", 1, 5, log=True),\n",
        "        \"loss_function\": \"RMSE\",\n",
        "        \"eval_metric\": \"RMSE\",\n",
        "        \"random_seed\": 42,\n",
        "        \"verbose\": 0\n",
        "    }\n",
        "\n",
        "    model = CatBoostRegressor(**params)\n",
        "    model.fit(train_x, train_y, eval_set=(val_x, val_y), verbose=1000, cat_features=categorical_features)\n",
        "\n",
        "    preds = model.predict(val_x)\n",
        "    rmse = mean_squared_error(val_y, preds)\n",
        "    rmse = np.sqrt(rmse)\n",
        "    return rmse\n",
        "\n",
        "study = optuna.create_study(direction=\"minimize\")\n",
        "study.optimize(objective, n_trials=20)  # Количество итераций\n",
        "\n",
        "# 📌 6. Обучение модели с лучшими параметрами\n",
        "best_params = study.best_params\n",
        "best_model = CatBoostRegressor(**best_params)\n",
        "best_model.fit(train_x, train_y, eval_set=(val_x, val_y), verbose=100, cat_features=categorical_features)\n",
        "\n",
        "# 📌 7. Оценка модели\n",
        "final_preds = best_model.predict(val_x)\n",
        "rmse = mean_squared_error(val_y, final_preds)\n",
        "rmse = np.sqrt(rmse)\n",
        "print(f\"Final RMSE: {rmse}\")"
      ],
      "metadata": {
        "id": "znndOvytsZjD",
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "outputId": "d89b6f06-5024-4d49-b9e5-3c4230f75ffc"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:31:59,003] A new study created in memory with name: no-name-58f52034-88e2-4d88-946d-5248e270279f\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0:\tlearn: 78333.0649674\ttest: 79904.7958863\tbest: 79904.7958863 (0)\ttotal: 27.3ms\tremaining: 18s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:32:11,019] Trial 0 finished with value: 27590.170657313705 and parameters: {'iterations': 660, 'depth': 5, 'learning_rate': 0.016357363220883117, 'l2_leaf_reg': 1.0467755164243835}. Best is trial 0 with value: 27590.170657313705.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "659:\tlearn: 15502.3089534\ttest: 27590.1706573\tbest: 27590.1706573 (659)\ttotal: 11.8s\tremaining: 0us\n",
            "\n",
            "bestTest = 27590.17066\n",
            "bestIteration = 659\n",
            "\n",
            "0:\tlearn: 78018.7607019\ttest: 79644.0190750\tbest: 79644.0190750 (0)\ttotal: 19.2ms\tremaining: 18.9s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:32:31,044] Trial 1 finished with value: 28054.269444924557 and parameters: {'iterations': 983, 'depth': 6, 'learning_rate': 0.023902850062876548, 'l2_leaf_reg': 3.5310269432730204}. Best is trial 0 with value: 27590.170657313705.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "982:\tlearn: 11591.3196453\ttest: 28054.5273026\tbest: 28054.2694449 (980)\ttotal: 19.9s\tremaining: 0us\n",
            "\n",
            "bestTest = 28054.26944\n",
            "bestIteration = 980\n",
            "\n",
            "Shrink model to first 981 iterations.\n",
            "0:\tlearn: 75845.8232014\ttest: 77563.7878037\tbest: 77563.7878037 (0)\ttotal: 5.46ms\tremaining: 5.36s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:32:36,495] Trial 2 finished with value: 28698.64513756474 and parameters: {'iterations': 982, 'depth': 3, 'learning_rate': 0.07393437088919934, 'l2_leaf_reg': 4.407243596530369}. Best is trial 0 with value: 27590.170657313705.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "981:\tlearn: 12432.3504777\ttest: 28710.5183190\tbest: 28698.6451376 (980)\ttotal: 5.33s\tremaining: 0us\n",
            "\n",
            "bestTest = 28698.64514\n",
            "bestIteration = 980\n",
            "\n",
            "Shrink model to first 981 iterations.\n",
            "0:\tlearn: 74955.9690638\ttest: 76649.8689567\tbest: 76649.8689567 (0)\ttotal: 7.68ms\tremaining: 3.96s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:32:41,288] Trial 3 finished with value: 27869.917555899843 and parameters: {'iterations': 517, 'depth': 4, 'learning_rate': 0.09140291137980973, 'l2_leaf_reg': 1.8278943374326093}. Best is trial 0 with value: 27590.170657313705.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "516:\tlearn: 10760.0069501\ttest: 27872.5799946\tbest: 27869.9175559 (514)\ttotal: 4.7s\tremaining: 0us\n",
            "\n",
            "bestTest = 27869.91756\n",
            "bestIteration = 514\n",
            "\n",
            "Shrink model to first 515 iterations.\n",
            "0:\tlearn: 75902.0846134\ttest: 77618.1762520\tbest: 77618.1762520 (0)\ttotal: 5.85ms\tremaining: 5.36s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:32:45,628] Trial 4 finished with value: 29733.270157741175 and parameters: {'iterations': 917, 'depth': 3, 'learning_rate': 0.07192589301350974, 'l2_leaf_reg': 3.2029754979257365}. Best is trial 0 with value: 27590.170657313705.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "916:\tlearn: 12609.4158087\ttest: 29733.2701577\tbest: 29733.2701577 (916)\ttotal: 4.23s\tremaining: 0us\n",
            "\n",
            "bestTest = 29733.27016\n",
            "bestIteration = 916\n",
            "\n",
            "0:\tlearn: 75582.1119423\ttest: 77314.1873521\tbest: 77314.1873521 (0)\ttotal: 5.75ms\tremaining: 3.94s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:32:49,583] Trial 5 finished with value: 27968.432737151175 and parameters: {'iterations': 686, 'depth': 3, 'learning_rate': 0.07995901292964724, 'l2_leaf_reg': 4.308480258560373}. Best is trial 0 with value: 27590.170657313705.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "685:\tlearn: 14109.0275208\ttest: 27971.4125965\tbest: 27968.4327372 (682)\ttotal: 3.85s\tremaining: 0us\n",
            "\n",
            "bestTest = 27968.43274\n",
            "bestIteration = 682\n",
            "\n",
            "Shrink model to first 683 iterations.\n",
            "0:\tlearn: 75970.1743846\ttest: 77762.0780575\tbest: 77762.0780575 (0)\ttotal: 14.7ms\tremaining: 8.91s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:33:01,983] Trial 6 finished with value: 25505.915021309767 and parameters: {'iterations': 607, 'depth': 6, 'learning_rate': 0.0677948134301802, 'l2_leaf_reg': 2.0814646273997335}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "606:\tlearn: 7610.4218093\ttest: 25505.9150213\tbest: 25505.9150213 (606)\ttotal: 12.3s\tremaining: 0us\n",
            "\n",
            "bestTest = 25505.91502\n",
            "bestIteration = 606\n",
            "\n",
            "0:\tlearn: 78495.5523435\ttest: 80059.4150816\tbest: 80059.4150816 (0)\ttotal: 7.88ms\tremaining: 6.68s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:33:08,075] Trial 7 finished with value: 29586.253948823174 and parameters: {'iterations': 848, 'depth': 4, 'learning_rate': 0.013374392400396598, 'l2_leaf_reg': 2.5661120089875267}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "847:\tlearn: 18062.8046939\ttest: 29605.7690621\tbest: 29586.2539488 (845)\ttotal: 5.97s\tremaining: 0us\n",
            "\n",
            "bestTest = 29586.25395\n",
            "bestIteration = 845\n",
            "\n",
            "Shrink model to first 846 iterations.\n",
            "0:\tlearn: 73604.1185921\ttest: 75594.7388721\tbest: 75594.7388721 (0)\ttotal: 16.3ms\tremaining: 14.4s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:33:29,081] Trial 8 finished with value: 25942.63417941992 and parameters: {'iterations': 882, 'depth': 6, 'learning_rate': 0.11850075769005632, 'l2_leaf_reg': 1.2815307519387589}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "881:\tlearn: 2670.1533352\ttest: 25988.0585841\tbest: 25942.6341794 (629)\ttotal: 20.8s\tremaining: 0us\n",
            "\n",
            "bestTest = 25942.63418\n",
            "bestIteration = 629\n",
            "\n",
            "Shrink model to first 630 iterations.\n",
            "0:\tlearn: 70492.3985001\ttest: 72724.8985510\tbest: 72724.8985510 (0)\ttotal: 15.7ms\tremaining: 14.4s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:33:50,797] Trial 9 finished with value: 28116.9240097469 and parameters: {'iterations': 920, 'depth': 6, 'learning_rate': 0.18979605485431683, 'l2_leaf_reg': 1.6195835013784619}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "919:\tlearn: 1285.3683811\ttest: 28119.6568015\tbest: 28116.9240097 (912)\ttotal: 21.6s\tremaining: 0us\n",
            "\n",
            "bestTest = 28116.92401\n",
            "bestIteration = 912\n",
            "\n",
            "Shrink model to first 913 iterations.\n",
            "0:\tlearn: 77376.3347012\ttest: 78983.1492222\tbest: 78983.1492222 (0)\ttotal: 10.7ms\tremaining: 5.35s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:33:57,061] Trial 10 finished with value: 28578.426794142797 and parameters: {'iterations': 500, 'depth': 5, 'learning_rate': 0.037494437856804716, 'l2_leaf_reg': 2.120553073873791}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "499:\tlearn: 13757.2642000\ttest: 28583.2570555\tbest: 28578.4267941 (496)\ttotal: 6.15s\tremaining: 0us\n",
            "\n",
            "bestTest = 28578.42679\n",
            "bestIteration = 496\n",
            "\n",
            "Shrink model to first 497 iterations.\n",
            "0:\tlearn: 71124.7124091\ttest: 73315.6531358\tbest: 73315.6531358 (0)\ttotal: 15.2ms\tremaining: 12.1s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:34:12,433] Trial 11 finished with value: 25654.218131104917 and parameters: {'iterations': 794, 'depth': 6, 'learning_rate': 0.174045018590583, 'l2_leaf_reg': 1.2553559685810602}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "793:\tlearn: 1806.1983257\ttest: 25665.0388059\tbest: 25654.2181311 (748)\ttotal: 15.2s\tremaining: 0us\n",
            "\n",
            "bestTest = 25654.21813\n",
            "bestIteration = 748\n",
            "\n",
            "Shrink model to first 749 iterations.\n",
            "0:\tlearn: 65965.9525773\ttest: 67916.9191772\tbest: 67916.9191772 (0)\ttotal: 11ms\tremaining: 8.49s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:34:21,637] Trial 12 finished with value: 27568.089228145127 and parameters: {'iterations': 772, 'depth': 5, 'learning_rate': 0.29984336832096536, 'l2_leaf_reg': 1.4187464720313705}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "771:\tlearn: 2005.5261278\ttest: 27917.9813807\tbest: 27568.0892281 (122)\ttotal: 9.07s\tremaining: 0us\n",
            "\n",
            "bestTest = 27568.08923\n",
            "bestIteration = 122\n",
            "\n",
            "Shrink model to first 123 iterations.\n",
            "0:\tlearn: 71830.7050936\ttest: 73968.8924491\tbest: 73968.8924491 (0)\ttotal: 18.1ms\tremaining: 10.6s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:34:33,113] Trial 13 finished with value: 27783.988974347536 and parameters: {'iterations': 584, 'depth': 6, 'learning_rate': 0.15753043024546495, 'l2_leaf_reg': 1.0844493978212573}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "583:\tlearn: 2932.3559304\ttest: 27843.9749369\tbest: 27783.9889743 (530)\ttotal: 11.3s\tremaining: 0us\n",
            "\n",
            "bestTest = 27783.98897\n",
            "bestIteration = 530\n",
            "\n",
            "Shrink model to first 531 iterations.\n",
            "0:\tlearn: 77459.1675696\ttest: 79062.3182394\tbest: 79062.3182394 (0)\ttotal: 13.9ms\tremaining: 10.6s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:34:41,591] Trial 14 finished with value: 26010.87795996257 and parameters: {'iterations': 765, 'depth': 5, 'learning_rate': 0.035862946535418744, 'l2_leaf_reg': 2.3974801719363676}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "764:\tlearn: 11631.4998202\ttest: 26010.9213248\tbest: 26010.8779600 (763)\ttotal: 8.32s\tremaining: 0us\n",
            "\n",
            "bestTest = 26010.87796\n",
            "bestIteration = 763\n",
            "\n",
            "Shrink model to first 764 iterations.\n",
            "0:\tlearn: 67429.2452416\ttest: 69891.3553373\tbest: 69891.3553373 (0)\ttotal: 33.2ms\tremaining: 22s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:34:55,343] Trial 15 finished with value: 28138.792254996108 and parameters: {'iterations': 663, 'depth': 6, 'learning_rate': 0.2632530567491658, 'l2_leaf_reg': 1.8431661317560617}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "662:\tlearn: 1571.5532066\ttest: 28146.6342828\tbest: 28138.7922550 (556)\ttotal: 13.6s\tremaining: 0us\n",
            "\n",
            "bestTest = 28138.79225\n",
            "bestIteration = 556\n",
            "\n",
            "Shrink model to first 557 iterations.\n",
            "0:\tlearn: 77293.1263869\ttest: 78982.5109645\tbest: 78982.5109645 (0)\ttotal: 15.7ms\tremaining: 12.7s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:35:11,098] Trial 16 finished with value: 26805.860728204792 and parameters: {'iterations': 811, 'depth': 6, 'learning_rate': 0.038422029485352796, 'l2_leaf_reg': 1.3242863864510208}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "810:\tlearn: 8503.8489310\ttest: 26808.4863444\tbest: 26805.8607282 (780)\ttotal: 15.6s\tremaining: 0us\n",
            "\n",
            "bestTest = 26805.86073\n",
            "bestIteration = 780\n",
            "\n",
            "Shrink model to first 781 iterations.\n",
            "0:\tlearn: 72807.3168340\ttest: 74567.4359805\tbest: 74567.4359805 (0)\ttotal: 8.14ms\tremaining: 4.83s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:35:16,427] Trial 17 finished with value: 27731.2561681143 and parameters: {'iterations': 594, 'depth': 4, 'learning_rate': 0.14231687844310859, 'l2_leaf_reg': 2.7513555972307113}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "593:\tlearn: 8708.7251480\ttest: 27745.5415055\tbest: 27731.2561681 (592)\ttotal: 5.21s\tremaining: 0us\n",
            "\n",
            "bestTest = 27731.25617\n",
            "bestIteration = 592\n",
            "\n",
            "Shrink model to first 593 iterations.\n",
            "0:\tlearn: 69632.6423041\ttest: 71491.3158993\tbest: 71491.3158993 (0)\ttotal: 11.4ms\tremaining: 8.13s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:35:24,653] Trial 18 finished with value: 28562.629881846802 and parameters: {'iterations': 712, 'depth': 5, 'learning_rate': 0.21317542189811298, 'l2_leaf_reg': 1.9932455456134346}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "711:\tlearn: 3451.4434144\ttest: 28588.2419094\tbest: 28562.6298818 (662)\ttotal: 8.09s\tremaining: 0us\n",
            "\n",
            "bestTest = 28562.62988\n",
            "bestIteration = 662\n",
            "\n",
            "Shrink model to first 663 iterations.\n",
            "0:\tlearn: 76839.6083414\ttest: 78564.7956370\tbest: 78564.7956370 (0)\ttotal: 15.2ms\tremaining: 9.2s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 13:35:36,463] Trial 19 finished with value: 27636.592120178866 and parameters: {'iterations': 608, 'depth': 6, 'learning_rate': 0.04833367065674872, 'l2_leaf_reg': 1.5504919469866252}. Best is trial 6 with value: 25505.915021309767.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "607:\tlearn: 9007.5238814\ttest: 27636.5921202\tbest: 27636.5921202 (607)\ttotal: 11.7s\tremaining: 0us\n",
            "\n",
            "bestTest = 27636.59212\n",
            "bestIteration = 607\n",
            "\n",
            "0:\tlearn: 75633.7775578\ttest: 77511.7476552\tbest: 77511.7476552 (0)\ttotal: 14.1ms\tremaining: 8.56s\n",
            "100:\tlearn: 18169.0442353\ttest: 31046.3187900\tbest: 31046.3187900 (100)\ttotal: 2.28s\tremaining: 11.4s\n",
            "200:\tlearn: 14158.4309190\ttest: 29124.2526013\tbest: 29124.2526013 (200)\ttotal: 4.18s\tremaining: 8.45s\n",
            "300:\tlearn: 11582.0708109\ttest: 28465.6510796\tbest: 28465.6510796 (300)\ttotal: 6.09s\tremaining: 6.19s\n",
            "400:\tlearn: 9875.7318011\ttest: 28048.5426547\tbest: 28043.2554774 (398)\ttotal: 7.97s\tremaining: 4.09s\n",
            "500:\tlearn: 8681.9566210\ttest: 27841.4177588\tbest: 27840.8079547 (493)\ttotal: 9.94s\tremaining: 2.1s\n",
            "600:\tlearn: 7654.3280918\ttest: 27619.5164463\tbest: 27609.0403144 (598)\ttotal: 11.8s\tremaining: 118ms\n",
            "606:\tlearn: 7594.7749448\ttest: 27610.8376438\tbest: 27609.0403144 (598)\ttotal: 11.9s\tremaining: 0us\n",
            "\n",
            "bestTest = 27609.04031\n",
            "bestIteration = 598\n",
            "\n",
            "Shrink model to first 599 iterations.\n",
            "Final RMSE: 27609.040314427017\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(best_params)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "RjnaYan5DT38",
        "outputId": "16e18b21-7947-496a-900d-4eaf300fe889"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'iterations': 607, 'depth': 6, 'learning_rate': 0.0677948134301802, 'l2_leaf_reg': 2.0814646273997335}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "test = pd.read_csv('/content/test (3).csv')\n",
        "test.head(3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 193
        },
        "id": "6wFXbBuh3A_A",
        "outputId": "ae1e95e7-d685-4451-d328-98ec736045c4"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "     Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \\\n",
              "0  1461          20       RH         80.0    11622   Pave   NaN      Reg   \n",
              "1  1462          20       RL         81.0    14267   Pave   NaN      IR1   \n",
              "2  1463          60       RL         74.0    13830   Pave   NaN      IR1   \n",
              "\n",
              "  LandContour Utilities  ... ScreenPorch PoolArea PoolQC  Fence MiscFeature  \\\n",
              "0         Lvl    AllPub  ...         120        0    NaN  MnPrv         NaN   \n",
              "1         Lvl    AllPub  ...           0        0    NaN    NaN        Gar2   \n",
              "2         Lvl    AllPub  ...           0        0    NaN  MnPrv         NaN   \n",
              "\n",
              "  MiscVal MoSold  YrSold  SaleType  SaleCondition  \n",
              "0       0      6    2010        WD         Normal  \n",
              "1   12500      6    2010        WD         Normal  \n",
              "2       0      3    2010        WD         Normal  \n",
              "\n",
              "[3 rows x 80 columns]"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-4cb2ad85-a729-43e5-8ee5-e2dab9cf055f\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Id</th>\n",
              "      <th>MSSubClass</th>\n",
              "      <th>MSZoning</th>\n",
              "      <th>LotFrontage</th>\n",
              "      <th>LotArea</th>\n",
              "      <th>Street</th>\n",
              "      <th>Alley</th>\n",
              "      <th>LotShape</th>\n",
              "      <th>LandContour</th>\n",
              "      <th>Utilities</th>\n",
              "      <th>...</th>\n",
              "      <th>ScreenPorch</th>\n",
              "      <th>PoolArea</th>\n",
              "      <th>PoolQC</th>\n",
              "      <th>Fence</th>\n",
              "      <th>MiscFeature</th>\n",
              "      <th>MiscVal</th>\n",
              "      <th>MoSold</th>\n",
              "      <th>YrSold</th>\n",
              "      <th>SaleType</th>\n",
              "      <th>SaleCondition</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1461</td>\n",
              "      <td>20</td>\n",
              "      <td>RH</td>\n",
              "      <td>80.0</td>\n",
              "      <td>11622</td>\n",
              "      <td>Pave</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Reg</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>...</td>\n",
              "      <td>120</td>\n",
              "      <td>0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>MnPrv</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0</td>\n",
              "      <td>6</td>\n",
              "      <td>2010</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1462</td>\n",
              "      <td>20</td>\n",
              "      <td>RL</td>\n",
              "      <td>81.0</td>\n",
              "      <td>14267</td>\n",
              "      <td>Pave</td>\n",
              "      <td>NaN</td>\n",
              "      <td>IR1</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Gar2</td>\n",
              "      <td>12500</td>\n",
              "      <td>6</td>\n",
              "      <td>2010</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1463</td>\n",
              "      <td>60</td>\n",
              "      <td>RL</td>\n",
              "      <td>74.0</td>\n",
              "      <td>13830</td>\n",
              "      <td>Pave</td>\n",
              "      <td>NaN</td>\n",
              "      <td>IR1</td>\n",
              "      <td>Lvl</td>\n",
              "      <td>AllPub</td>\n",
              "      <td>...</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>MnPrv</td>\n",
              "      <td>NaN</td>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>2010</td>\n",
              "      <td>WD</td>\n",
              "      <td>Normal</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>3 rows × 80 columns</p>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-4cb2ad85-a729-43e5-8ee5-e2dab9cf055f')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-4cb2ad85-a729-43e5-8ee5-e2dab9cf055f button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-4cb2ad85-a729-43e5-8ee5-e2dab9cf055f');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-41af24b2-08f1-4f5c-845d-777b8fb04e72\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-41af24b2-08f1-4f5c-845d-777b8fb04e72')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-41af24b2-08f1-4f5c-845d-777b8fb04e72 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "test"
            }
          },
          "metadata": {},
          "execution_count": 43
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "test = test.drop(drp, axis = 1)"
      ],
      "metadata": {
        "id": "gywjd_Mg6D6a"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "x = list(val_x.columns)\n",
        "y = list(test.columns)\n",
        "for i in y:\n",
        "    if i not in x:\n",
        "        print(i)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3YbK3HHZ6EPl",
        "outputId": "ea20f06b-5170-4084-a6ed-2dd29efda442"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Id\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "missing_values_table = test.isna().sum().reset_index()\n",
        "missing_values_table.columns = ['Column', 'Missing Values']\n",
        "missing_values_table['% of Total Values'] = (missing_values_table['Missing Values'] / len(test)) * 100\n",
        "\n",
        "print(missing_values_table['% of Total Values'])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "oUzMUS_e6_20",
        "outputId": "4653b080-5814-4867-ffca-7d1f6d19f50c"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0     0.0\n",
            "1     0.0\n",
            "2     0.0\n",
            "3     0.0\n",
            "4     0.0\n",
            "     ... \n",
            "68    0.0\n",
            "69    0.0\n",
            "70    0.0\n",
            "71    0.0\n",
            "72    0.0\n",
            "Name: % of Total Values, Length: 73, dtype: float64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "a = list(missing_values_table['% of Total Values'])\n",
        "num = []\n",
        "cat = []\n",
        "drp = []\n",
        "for i in range(len(columns)):\n",
        "    if missing_values_table['% of Total Values'].iloc[i] > 0:\n",
        "        if missing_values_table['% of Total Values'].iloc[i] > 40:\n",
        "            drp.append(str(missing_values_table['Column'].iloc[i]))\n",
        "        elif str(missing_values_table['Column'].iloc[i]) in categorical_features:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i], \"CATEGORICAL\")\n",
        "            cat.append(str(missing_values_table['Column'].iloc[i]))\n",
        "        else:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i])\n",
        "            num.append(str(missing_values_table['Column'].iloc[i]))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "3x5eDmQk7djd",
        "outputId": "bff3649e-357f-4352-af6b-142a4e443774"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "MSZoning 0.2741603838245374 CATEGORICAL\n",
            "LotFrontage 15.558601782042494\n",
            "Utilities 0.1370801919122687 CATEGORICAL\n",
            "Exterior1st 0.06854009595613435 CATEGORICAL\n",
            "Exterior2nd 0.06854009595613435 CATEGORICAL\n",
            "MasVnrArea 1.0281014393420151\n",
            "BsmtQual 3.015764222069911 CATEGORICAL\n",
            "BsmtCond 3.0843043180260454 CATEGORICAL\n",
            "BsmtExposure 3.015764222069911 CATEGORICAL\n",
            "BsmtFinType1 2.878684030157642 CATEGORICAL\n",
            "BsmtFinSF1 0.06854009595613435\n",
            "BsmtFinType2 2.878684030157642 CATEGORICAL\n",
            "BsmtFinSF2 0.06854009595613435\n",
            "BsmtUnfSF 0.06854009595613435\n",
            "TotalBsmtSF 0.06854009595613435\n",
            "BsmtFullBath 0.1370801919122687\n",
            "BsmtHalfBath 0.1370801919122687\n",
            "KitchenQual 0.06854009595613435 CATEGORICAL\n",
            "Functional 0.1370801919122687 CATEGORICAL\n",
            "GarageType 5.20904729266621 CATEGORICAL\n",
            "GarageYrBlt 5.346127484578479\n",
            "GarageFinish 5.346127484578479 CATEGORICAL\n",
            "GarageCars 0.06854009595613435\n",
            "GarageArea 0.06854009595613435\n",
            "GarageQual 5.346127484578479 CATEGORICAL\n",
            "GarageCond 5.346127484578479 CATEGORICAL\n",
            "SaleType 0.06854009595613435 CATEGORICAL\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "imputer = SimpleImputer(strategy='mean')\n",
        "test[num] = imputer.fit_transform(test[num])\n",
        "\n",
        "imputer_cat = SimpleImputer(strategy='most_frequent')\n",
        "test[cat] = imputer_cat.fit_transform(test[cat])"
      ],
      "metadata": {
        "id": "mN0V3AR77rLz"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "final_preds = best_model.predict(test)"
      ],
      "metadata": {
        "id": "OfjqJ21s5HAe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "final = pd.DataFrame({\n",
        "    \"Id\": test['Id'],\n",
        "    \"SalePrice\": final_preds\n",
        "})\n",
        "final.head(3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 143
        },
        "id": "UcsIk_te3HBw",
        "outputId": "39b56c05-d2ff-419d-c2f4-e31af181fc37"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "     Id      SalePrice\n",
              "0  1461  116908.794116\n",
              "1  1462  163613.449249\n",
              "2  1463  184095.361491"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-914cca30-0642-4f5d-9892-5a732bd2891e\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Id</th>\n",
              "      <th>SalePrice</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1461</td>\n",
              "      <td>116908.794116</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1462</td>\n",
              "      <td>163613.449249</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1463</td>\n",
              "      <td>184095.361491</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-914cca30-0642-4f5d-9892-5a732bd2891e')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-914cca30-0642-4f5d-9892-5a732bd2891e button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-914cca30-0642-4f5d-9892-5a732bd2891e');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-531043d3-9f2c-4cfc-97b0-bffc1b69ba94\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-531043d3-9f2c-4cfc-97b0-bffc1b69ba94')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-531043d3-9f2c-4cfc-97b0-bffc1b69ba94 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "final",
              "summary": "{\n  \"name\": \"final\",\n  \"rows\": 1459,\n  \"fields\": [\n    {\n      \"column\": \"Id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 421,\n        \"min\": 1461,\n        \"max\": 2919,\n        \"num_unique_values\": 1459,\n        \"samples\": [\n          2782,\n          2297,\n          1874\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"SalePrice\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 76481.96226474205,\n        \"min\": 37583.029954204336,\n        \"max\": 525656.1493670065,\n        \"num_unique_values\": 1458,\n        \"samples\": [\n          93322.11940372009,\n          363516.775079109,\n          206466.05067474546\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 45
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "final.to_csv('final_0.csv', index=False)"
      ],
      "metadata": {
        "id": "R6O-mhDg4Er8"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Classification"
      ],
      "metadata": {
        "id": "NJN3_9QzBMFz"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install optuna\n",
        "!pip install catboost"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "N98BauBOBSay",
        "outputId": "cf59a174-7f19-4350-8f31-e6b3f41d8a8f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: optuna in /usr/local/lib/python3.11/dist-packages (4.2.1)\n",
            "Requirement already satisfied: alembic>=1.5.0 in /usr/local/lib/python3.11/dist-packages (from optuna) (1.14.1)\n",
            "Requirement already satisfied: colorlog in /usr/local/lib/python3.11/dist-packages (from optuna) (6.9.0)\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from optuna) (1.26.4)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from optuna) (24.2)\n",
            "Requirement already satisfied: sqlalchemy>=1.4.2 in /usr/local/lib/python3.11/dist-packages (from optuna) (2.0.38)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from optuna) (4.67.1)\n",
            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.11/dist-packages (from optuna) (6.0.2)\n",
            "Requirement already satisfied: Mako in /usr/local/lib/python3.11/dist-packages (from alembic>=1.5.0->optuna) (1.3.9)\n",
            "Requirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.11/dist-packages (from alembic>=1.5.0->optuna) (4.12.2)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from sqlalchemy>=1.4.2->optuna) (3.1.1)\n",
            "Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.11/dist-packages (from Mako->alembic>=1.5.0->optuna) (3.0.2)\n",
            "Requirement already satisfied: catboost in /usr/local/lib/python3.11/dist-packages (1.2.7)\n",
            "Requirement already satisfied: graphviz in /usr/local/lib/python3.11/dist-packages (from catboost) (0.20.3)\n",
            "Requirement already satisfied: matplotlib in /usr/local/lib/python3.11/dist-packages (from catboost) (3.10.0)\n",
            "Requirement already satisfied: numpy<2.0,>=1.16.0 in /usr/local/lib/python3.11/dist-packages (from catboost) (1.26.4)\n",
            "Requirement already satisfied: pandas>=0.24 in /usr/local/lib/python3.11/dist-packages (from catboost) (2.2.2)\n",
            "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from catboost) (1.13.1)\n",
            "Requirement already satisfied: plotly in /usr/local/lib/python3.11/dist-packages (from catboost) (5.24.1)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.11/dist-packages (from catboost) (1.17.0)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas>=0.24->catboost) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas>=0.24->catboost) (2025.1)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas>=0.24->catboost) (2025.1)\n",
            "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (1.3.1)\n",
            "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (0.12.1)\n",
            "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (4.56.0)\n",
            "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (1.4.8)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (24.2)\n",
            "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (11.1.0)\n",
            "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.11/dist-packages (from matplotlib->catboost) (3.2.1)\n",
            "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.11/dist-packages (from plotly->catboost) (9.0.0)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.impute import SimpleImputer\n",
        "import numpy as np\n",
        "import pandas as pd\n",
        "from catboost import CatBoostClassifier, Pool\n",
        "import optuna\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import accuracy_score"
      ],
      "metadata": {
        "id": "jo-YFxu7CwV5"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train = pd.read_csv('/content/train (7).csv', index_col=False)\n",
        "train = train.drop('PassengerId', axis=1)\n",
        "test = pd.read_csv('/content/test (4).csv', index_col=False)"
      ],
      "metadata": {
        "id": "LKe8gmdxC8ZU"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train.head(3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 143
        },
        "id": "ZzkNv4UzLQ0B",
        "outputId": "ae3a43e1-0e84-4723-cec4-ddf24d85a9a2"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   Survived  Pclass                                               Name  \\\n",
              "0         0       3                            Braund, Mr. Owen Harris   \n",
              "1         1       1  Cumings, Mrs. John Bradley (Florence Briggs Th...   \n",
              "2         1       3                             Heikkinen, Miss. Laina   \n",
              "\n",
              "      Sex   Age  SibSp  Parch            Ticket     Fare Cabin Embarked  \n",
              "0    male  22.0      1      0         A/5 21171   7.2500   NaN        S  \n",
              "1  female  38.0      1      0          PC 17599  71.2833   C85        C  \n",
              "2  female  26.0      0      0  STON/O2. 3101282   7.9250   NaN        S  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-5f7982b3-5899-446e-ad96-0e4dfaf04ec0\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Survived</th>\n",
              "      <th>Pclass</th>\n",
              "      <th>Name</th>\n",
              "      <th>Sex</th>\n",
              "      <th>Age</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Ticket</th>\n",
              "      <th>Fare</th>\n",
              "      <th>Cabin</th>\n",
              "      <th>Embarked</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>0</td>\n",
              "      <td>3</td>\n",
              "      <td>Braund, Mr. Owen Harris</td>\n",
              "      <td>male</td>\n",
              "      <td>22.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>A/5 21171</td>\n",
              "      <td>7.2500</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>1</td>\n",
              "      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
              "      <td>female</td>\n",
              "      <td>38.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>PC 17599</td>\n",
              "      <td>71.2833</td>\n",
              "      <td>C85</td>\n",
              "      <td>C</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1</td>\n",
              "      <td>3</td>\n",
              "      <td>Heikkinen, Miss. Laina</td>\n",
              "      <td>female</td>\n",
              "      <td>26.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>STON/O2. 3101282</td>\n",
              "      <td>7.9250</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5f7982b3-5899-446e-ad96-0e4dfaf04ec0')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-5f7982b3-5899-446e-ad96-0e4dfaf04ec0 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-5f7982b3-5899-446e-ad96-0e4dfaf04ec0');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-d76f977f-17cb-4fda-a224-3eda10673299\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-d76f977f-17cb-4fda-a224-3eda10673299')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-d76f977f-17cb-4fda-a224-3eda10673299 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "train",
              "summary": "{\n  \"name\": \"train\",\n  \"rows\": 891,\n  \"fields\": [\n    {\n      \"column\": \"Survived\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Pclass\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 3,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          3,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 891,\n        \"samples\": [\n          \"Moubarek, Master. Halim Gonios (\\\"William George\\\")\",\n          \"Kvillner, Mr. Johan Henrik Johannesson\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Sex\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"female\",\n          \"male\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 14.526497332334044,\n        \"min\": 0.42,\n        \"max\": 80.0,\n        \"num_unique_values\": 88,\n        \"samples\": [\n          0.75,\n          22.0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"SibSp\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 1,\n        \"min\": 0,\n        \"max\": 8,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Parch\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 6,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          0,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Ticket\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 681,\n        \"samples\": [\n          \"11774\",\n          \"248740\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Fare\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 49.693428597180905,\n        \"min\": 0.0,\n        \"max\": 512.3292,\n        \"num_unique_values\": 248,\n        \"samples\": [\n          11.2417,\n          51.8625\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Cabin\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 147,\n        \"samples\": [\n          \"D45\",\n          \"B49\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Embarked\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"S\",\n          \"C\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 69
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "missing_values_table = train.isna().sum().reset_index()\n",
        "missing_values_table.columns = ['Column', 'Missing Values']\n",
        "missing_values_table['% of Total Values'] = (missing_values_table['Missing Values'] / len(train)) * 100\n",
        "\n",
        "print(missing_values_table['% of Total Values'])"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "4P7skW49DMAD",
        "outputId": "3fd75205-5e77-4d06-d6a2-8046c51549a6"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0      0.000000\n",
            "1      0.000000\n",
            "2      0.000000\n",
            "3      0.000000\n",
            "4     19.865320\n",
            "5      0.000000\n",
            "6      0.000000\n",
            "7      0.000000\n",
            "8      0.000000\n",
            "9     77.104377\n",
            "10     0.224467\n",
            "Name: % of Total Values, dtype: float64\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "columns = list(train.columns)\n",
        "categorical_features = []\n",
        "for i in columns:\n",
        "    if str(type(train[i].iloc[0])) == \"<class 'str'>\":\n",
        "        categorical_features.append(i)"
      ],
      "metadata": {
        "id": "Q0MJb-FnGXTT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "a = list(missing_values_table['% of Total Values'])\n",
        "num = []\n",
        "cat = []\n",
        "drp = []\n",
        "for i in range(len(a)):\n",
        "    if missing_values_table['% of Total Values'].iloc[i] > 0:\n",
        "        if missing_values_table['% of Total Values'].iloc[i] > 40:\n",
        "            drp.append(str(missing_values_table['Column'].iloc[i]))\n",
        "        elif str(missing_values_table['Column'].iloc[i]) in categorical_features:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i], \"CATEGORICAL\")\n",
        "            cat.append(str(missing_values_table['Column'].iloc[i]))\n",
        "        else:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i])\n",
        "            num.append(str(missing_values_table['Column'].iloc[i]))\n",
        "\n",
        "print(drp)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "CO6Z0u4TDTFx",
        "outputId": "74fa99ed-db10-4781-9520-caba786bfa2e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Age 19.865319865319865\n",
            "Embarked 0.22446689113355783 CATEGORICAL\n",
            "['Cabin']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "train = train.drop(drp, axis = 1)\n",
        "\n",
        "imputer = SimpleImputer(strategy='mean')\n",
        "train[num] = imputer.fit_transform(train[num])\n",
        "\n",
        "imputer_cat = SimpleImputer(strategy='most_frequent')\n",
        "train[cat] = imputer_cat.fit_transform(train[cat])"
      ],
      "metadata": {
        "id": "9Z_pmurrF3NP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "y = train['Survived']\n",
        "x = train.drop('Survived', axis =1)\n",
        "\n",
        "train_x, val_x, train_y, val_y = train_test_split(x, y, test_size=0.2)"
      ],
      "metadata": {
        "id": "wxvdyGaYLD6d"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "columns = list(train_x.columns)\n",
        "categorical_features = []\n",
        "for i in columns:\n",
        "    if str(type(train[i].iloc[0])) == \"<class 'str'>\":\n",
        "        categorical_features.append(i)"
      ],
      "metadata": {
        "id": "_a5vRqgOMK_n"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def objective(trial):\n",
        "    params = {\n",
        "        \"iterations\": trial.suggest_int(\"iterations\", 500, 2000),\n",
        "        \"depth\": trial.suggest_int(\"depth\", 4, 7),\n",
        "        \"learning_rate\": trial.suggest_float(\"learning_rate\", 0.01, 0.3, log=True),\n",
        "        \"l2_leaf_reg\": trial.suggest_float(\"l2_leaf_reg\", 1, 5, log=True),\n",
        "        \"loss_function\": \"Logloss\",\n",
        "        \"eval_metric\": \"Accuracy\",\n",
        "        \"random_seed\": 42,\n",
        "        \"verbose\": 0\n",
        "    }\n",
        "\n",
        "    model = CatBoostClassifier(**params)\n",
        "    model.fit(train_x, train_y, eval_set=(val_x, val_y), verbose=1000, cat_features=categorical_features)\n",
        "\n",
        "    preds = model.predict(val_x)\n",
        "    accuracy = accuracy_score(val_y, preds)\n",
        "    return accuracy\n",
        "\n",
        "# 📌 5. Оптимизация гиперпараметров\n",
        "study = optuna.create_study(direction=\"maximize\")  # Для классификации — максимизируем accuracy\n",
        "study.optimize(objective, n_trials=20)  # Количество итераций\n",
        "\n",
        "# 📌 6. Обучение модели с лучшими параметрами\n",
        "best_params = study.best_params\n",
        "best_model = CatBoostClassifier(**best_params)\n",
        "best_model.fit(train_x, train_y, eval_set=(val_x, val_y), verbose=100, cat_features=categorical_features)\n",
        "\n",
        "# 📌 7. Оценка модели\n",
        "final_preds = best_model.predict(val_x)\n",
        "accuracy = accuracy_score(val_y, final_preds)\n",
        "print(f\"Final Accuracy: {accuracy:.4f}\")\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "FXMxqKChGOlq",
        "outputId": "1f6b7a2a-4bf8-440b-9546-2074e282d21b"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:28,374] A new study created in memory with name: no-name-5696c2d7-b3d1-4ecd-9c50-1c3b744c4f02\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "0:\tlearn: 0.8146067\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.92ms\tremaining: 3.15s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:30,557] Trial 0 finished with value: 0.8491620111731844 and parameters: {'iterations': 1078, 'depth': 5, 'learning_rate': 0.10281491066829387, 'l2_leaf_reg': 2.079064968064275}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1000:\tlearn: 0.9915730\ttest: 0.8212291\tbest: 0.8491620 (110)\ttotal: 1.92s\tremaining: 147ms\n",
            "1077:\tlearn: 0.9929775\ttest: 0.8268156\tbest: 0.8491620 (110)\ttotal: 2.06s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8491620112\n",
            "bestIteration = 110\n",
            "\n",
            "Shrink model to first 111 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 4.02ms\tremaining: 5.7s\n",
            "1000:\tlearn: 0.9676966\ttest: 0.8268156\tbest: 0.8435754 (364)\ttotal: 2.31s\tremaining: 963ms\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:34,005] Trial 1 finished with value: 0.8435754189944135 and parameters: {'iterations': 1418, 'depth': 6, 'learning_rate': 0.043508919824835304, 'l2_leaf_reg': 3.6659717941077608}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1417:\tlearn: 0.9761236\ttest: 0.8268156\tbest: 0.8435754 (364)\ttotal: 3.29s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 364\n",
            "\n",
            "Shrink model to first 365 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 3.05ms\tremaining: 1.61s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:35,126] Trial 2 finished with value: 0.8379888268156425 and parameters: {'iterations': 529, 'depth': 6, 'learning_rate': 0.01588182180207621, 'l2_leaf_reg': 1.6640193240287826}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "528:\tlearn: 0.8918539\ttest: 0.8212291\tbest: 0.8379888 (183)\ttotal: 1.04s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8379888268\n",
            "bestIteration = 183\n",
            "\n",
            "Shrink model to first 184 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.86ms\tremaining: 3.12s\n",
            "1000:\tlearn: 0.9845506\ttest: 0.8044693\tbest: 0.8435754 (220)\ttotal: 2.29s\tremaining: 204ms\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:37,940] Trial 3 finished with value: 0.8435754189944135 and parameters: {'iterations': 1090, 'depth': 6, 'learning_rate': 0.07543482284922365, 'l2_leaf_reg': 4.14476144682591}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1089:\tlearn: 0.9873596\ttest: 0.7988827\tbest: 0.8435754 (220)\ttotal: 2.68s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 220\n",
            "\n",
            "Shrink model to first 221 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 5.43ms\tremaining: 5.62s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:41,497] Trial 4 finished with value: 0.8435754189944135 and parameters: {'iterations': 1035, 'depth': 7, 'learning_rate': 0.2571458943835452, 'l2_leaf_reg': 3.8609229716750417}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1000:\tlearn: 1.0000000\ttest: 0.7877095\tbest: 0.8435754 (73)\ttotal: 3.33s\tremaining: 113ms\n",
            "1034:\tlearn: 1.0000000\ttest: 0.7877095\tbest: 0.8435754 (73)\ttotal: 3.42s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 73\n",
            "\n",
            "Shrink model to first 74 iterations.\n",
            "0:\tlearn: 0.8075843\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 1.89ms\tremaining: 2.21s\n",
            "1000:\tlearn: 0.9016854\ttest: 0.8156425\tbest: 0.8268156 (180)\ttotal: 1.56s\tremaining: 260ms\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:43,441] Trial 5 finished with value: 0.8268156424581006 and parameters: {'iterations': 1168, 'depth': 4, 'learning_rate': 0.01997922756376188, 'l2_leaf_reg': 4.656972624040588}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1167:\tlearn: 0.9073034\ttest: 0.8156425\tbest: 0.8268156 (180)\ttotal: 1.82s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8268156425\n",
            "bestIteration = 180\n",
            "\n",
            "Shrink model to first 181 iterations.\n",
            "0:\tlearn: 0.8146067\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.57ms\tremaining: 1.48s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:44,662] Trial 6 finished with value: 0.8435754189944135 and parameters: {'iterations': 574, 'depth': 5, 'learning_rate': 0.08834109776170529, 'l2_leaf_reg': 1.004762461065921}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "573:\tlearn: 0.9803371\ttest: 0.8156425\tbest: 0.8435754 (196)\ttotal: 1.14s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 196\n",
            "\n",
            "Shrink model to first 197 iterations.\n",
            "0:\tlearn: 0.8146067\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.62ms\tremaining: 2.25s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:46,428] Trial 7 finished with value: 0.8435754189944135 and parameters: {'iterations': 859, 'depth': 5, 'learning_rate': 0.05228624368798647, 'l2_leaf_reg': 2.2087846373564632}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "858:\tlearn: 0.9620787\ttest: 0.8212291\tbest: 0.8435754 (269)\ttotal: 1.66s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 269\n",
            "\n",
            "Shrink model to first 270 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.57ms\tremaining: 2.59s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:48,963] Trial 8 finished with value: 0.8379888268156425 and parameters: {'iterations': 1008, 'depth': 6, 'learning_rate': 0.23851821727115008, 'l2_leaf_reg': 1.9779337548268094}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1000:\tlearn: 1.0000000\ttest: 0.8044693\tbest: 0.8379888 (13)\ttotal: 2.39s\tremaining: 16.7ms\n",
            "1007:\tlearn: 1.0000000\ttest: 0.8044693\tbest: 0.8379888 (13)\ttotal: 2.41s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8379888268\n",
            "bestIteration = 13\n",
            "\n",
            "Shrink model to first 14 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 4.03ms\tremaining: 6.37s\n",
            "1000:\tlearn: 0.9957865\ttest: 0.7988827\tbest: 0.8435754 (180)\ttotal: 2.79s\tremaining: 1.61s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:53,178] Trial 9 finished with value: 0.8435754189944135 and parameters: {'iterations': 1581, 'depth': 6, 'learning_rate': 0.1067027642243077, 'l2_leaf_reg': 1.5977216451114515}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1580:\tlearn: 0.9985955\ttest: 0.7988827\tbest: 0.8435754 (180)\ttotal: 4.05s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 180\n",
            "\n",
            "Shrink model to first 181 iterations.\n",
            "0:\tlearn: 0.8075843\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.32ms\tremaining: 4.15s\n",
            "1000:\tlearn: 0.9859551\ttest: 0.8100559\tbest: 0.8379888 (70)\ttotal: 1.69s\tremaining: 1.33s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:05:56,240] Trial 10 finished with value: 0.8379888268156425 and parameters: {'iterations': 1790, 'depth': 4, 'learning_rate': 0.14047712034504312, 'l2_leaf_reg': 2.8601732035579426}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1789:\tlearn: 0.9943820\ttest: 0.8044693\tbest: 0.8379888 (70)\ttotal: 2.88s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8379888268\n",
            "bestIteration = 70\n",
            "\n",
            "Shrink model to first 71 iterations.\n",
            "0:\tlearn: 0.8202247\ttest: 0.8044693\tbest: 0.8044693 (0)\ttotal: 3.74ms\tremaining: 5.42s\n",
            "1000:\tlearn: 0.9662921\ttest: 0.8212291\tbest: 0.8324022 (93)\ttotal: 2.87s\tremaining: 1.29s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:01,278] Trial 11 finished with value: 0.8324022346368715 and parameters: {'iterations': 1450, 'depth': 7, 'learning_rate': 0.028888991528930555, 'l2_leaf_reg': 3.0539039026362467}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1449:\tlearn: 0.9775281\ttest: 0.8044693\tbest: 0.8324022 (93)\ttotal: 4.86s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8324022346\n",
            "bestIteration = 93\n",
            "\n",
            "Shrink model to first 94 iterations.\n",
            "0:\tlearn: 0.8146067\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.62ms\tremaining: 3.66s\n",
            "1000:\tlearn: 0.9634831\ttest: 0.8324022\tbest: 0.8435754 (789)\ttotal: 1.88s\tremaining: 752ms\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:04,107] Trial 12 finished with value: 0.8435754189944135 and parameters: {'iterations': 1401, 'depth': 5, 'learning_rate': 0.04369338314178926, 'l2_leaf_reg': 2.9064845966321844}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1400:\tlearn: 0.9733146\ttest: 0.8212291\tbest: 0.8435754 (789)\ttotal: 2.67s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 789\n",
            "\n",
            "Shrink model to first 790 iterations.\n",
            "0:\tlearn: 0.8146067\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.54ms\tremaining: 5.06s\n",
            "1000:\tlearn: 0.9620787\ttest: 0.8324022\tbest: 0.8379888 (559)\ttotal: 1.91s\tremaining: 1.9s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:08,112] Trial 13 finished with value: 0.8379888268156425 and parameters: {'iterations': 1997, 'depth': 5, 'learning_rate': 0.03491901521515997, 'l2_leaf_reg': 1.341232525010233}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1996:\tlearn: 0.9887640\ttest: 0.8156425\tbest: 0.8379888 (559)\ttotal: 3.81s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8379888268\n",
            "bestIteration = 559\n",
            "\n",
            "Shrink model to first 560 iterations.\n",
            "0:\tlearn: 0.8202247\ttest: 0.8044693\tbest: 0.8044693 (0)\ttotal: 4.51ms\tremaining: 6.08s\n",
            "1000:\tlearn: 0.9157303\ttest: 0.8379888\tbest: 0.8379888 (911)\ttotal: 2.5s\tremaining: 868ms\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:12,325] Trial 14 finished with value: 0.8435754189944135 and parameters: {'iterations': 1348, 'depth': 7, 'learning_rate': 0.011244048959052837, 'l2_leaf_reg': 3.417468037759992}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1347:\tlearn: 0.9367978\ttest: 0.8379888\tbest: 0.8435754 (1013)\ttotal: 4.01s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 1013\n",
            "\n",
            "Shrink model to first 1014 iterations.\n",
            "0:\tlearn: 0.8075843\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.96ms\tremaining: 2.35s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:13,805] Trial 15 finished with value: 0.8379888268156425 and parameters: {'iterations': 794, 'depth': 4, 'learning_rate': 0.17644799666045521, 'l2_leaf_reg': 2.4992825317390106}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "793:\tlearn: 0.9887640\ttest: 0.8156425\tbest: 0.8379888 (21)\ttotal: 1.36s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8379888268\n",
            "bestIteration = 21\n",
            "\n",
            "Shrink model to first 22 iterations.\n",
            "0:\tlearn: 0.8146067\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.62ms\tremaining: 4.26s\n",
            "1000:\tlearn: 0.9845506\ttest: 0.8044693\tbest: 0.8379888 (419)\ttotal: 1.99s\tremaining: 1.25s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:17,181] Trial 16 finished with value: 0.8379888268156425 and parameters: {'iterations': 1630, 'depth': 5, 'learning_rate': 0.07402923733761196, 'l2_leaf_reg': 1.801948907451662}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1629:\tlearn: 0.9943820\ttest: 0.7988827\tbest: 0.8379888 (419)\ttotal: 3.2s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8379888268\n",
            "bestIteration = 419\n",
            "\n",
            "Shrink model to first 420 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 3.28ms\tremaining: 4.12s\n",
            "1000:\tlearn: 0.9648876\ttest: 0.8324022\tbest: 0.8435754 (614)\ttotal: 2.29s\tremaining: 588ms\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:20,264] Trial 17 finished with value: 0.8435754189944135 and parameters: {'iterations': 1258, 'depth': 6, 'learning_rate': 0.025923545234300217, 'l2_leaf_reg': 1.3320547003575962}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "1257:\tlearn: 0.9705056\ttest: 0.8324022\tbest: 0.8435754 (614)\ttotal: 2.92s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 614\n",
            "\n",
            "Shrink model to first 615 iterations.\n",
            "0:\tlearn: 0.8188202\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 3.62ms\tremaining: 3.28s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:22,587] Trial 18 finished with value: 0.8379888268156425 and parameters: {'iterations': 907, 'depth': 6, 'learning_rate': 0.12819035429978284, 'l2_leaf_reg': 4.983150443455822}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "906:\tlearn: 0.9887640\ttest: 0.7988827\tbest: 0.8379888 (129)\ttotal: 2.19s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.8379888268\n",
            "bestIteration = 129\n",
            "\n",
            "Shrink model to first 130 iterations.\n",
            "0:\tlearn: 0.8146067\ttest: 0.7988827\tbest: 0.7988827 (0)\ttotal: 2.39ms\tremaining: 1.74s\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-22 15:06:24,680] Trial 19 finished with value: 0.8435754189944135 and parameters: {'iterations': 728, 'depth': 5, 'learning_rate': 0.06324796994402315, 'l2_leaf_reg': 2.469288457865203}. Best is trial 0 with value: 0.8491620111731844.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "727:\tlearn: 0.9691011\ttest: 0.8268156\tbest: 0.8435754 (408)\ttotal: 1.96s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.843575419\n",
            "bestIteration = 408\n",
            "\n",
            "Shrink model to first 409 iterations.\n",
            "0:\tlearn: 0.6306689\ttest: 0.6331918\tbest: 0.6331918 (0)\ttotal: 2.52ms\tremaining: 2.72s\n",
            "100:\tlearn: 0.2803246\ttest: 0.4433972\tbest: 0.4266044 (31)\ttotal: 200ms\tremaining: 1.93s\n",
            "200:\tlearn: 0.2079957\ttest: 0.4501661\tbest: 0.4266044 (31)\ttotal: 393ms\tremaining: 1.71s\n",
            "300:\tlearn: 0.1601585\ttest: 0.4684582\tbest: 0.4266044 (31)\ttotal: 587ms\tremaining: 1.51s\n",
            "400:\tlearn: 0.1227384\ttest: 0.4862125\tbest: 0.4266044 (31)\ttotal: 799ms\tremaining: 1.35s\n",
            "500:\tlearn: 0.1003636\ttest: 0.5040966\tbest: 0.4266044 (31)\ttotal: 999ms\tremaining: 1.15s\n",
            "600:\tlearn: 0.0832366\ttest: 0.5282830\tbest: 0.4266044 (31)\ttotal: 1.19s\tremaining: 942ms\n",
            "700:\tlearn: 0.0718905\ttest: 0.5492214\tbest: 0.4266044 (31)\ttotal: 1.37s\tremaining: 738ms\n",
            "800:\tlearn: 0.0637023\ttest: 0.5543152\tbest: 0.4266044 (31)\ttotal: 1.56s\tremaining: 540ms\n",
            "900:\tlearn: 0.0556375\ttest: 0.5667068\tbest: 0.4266044 (31)\ttotal: 1.77s\tremaining: 347ms\n",
            "1000:\tlearn: 0.0478476\ttest: 0.5991677\tbest: 0.4266044 (31)\ttotal: 1.96s\tremaining: 150ms\n",
            "1077:\tlearn: 0.0433863\ttest: 0.6106439\tbest: 0.4266044 (31)\ttotal: 2.1s\tremaining: 0us\n",
            "\n",
            "bestTest = 0.4266044357\n",
            "bestIteration = 31\n",
            "\n",
            "Shrink model to first 32 iterations.\n",
            "Final Accuracy: 0.8156\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "print(best_params)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "UQJrShMqMTG4",
        "outputId": "5a8f0ccf-6eae-4997-cbf7-c5137903f4fe"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "{'iterations': 1078, 'depth': 5, 'learning_rate': 0.10281491066829387, 'l2_leaf_reg': 2.079064968064275}\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "test.head(3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 143
        },
        "id": "qhjxdAn3NT0O",
        "outputId": "2de94b17-f37a-4fc3-80a9-6e980fc72c73"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   PassengerId  Pclass                              Name     Sex   Age  SibSp  \\\n",
              "0          892       3                  Kelly, Mr. James    male  34.5      0   \n",
              "1          893       3  Wilkes, Mrs. James (Ellen Needs)  female  47.0      1   \n",
              "2          894       2         Myles, Mr. Thomas Francis    male  62.0      0   \n",
              "\n",
              "   Parch  Ticket    Fare Cabin Embarked  \n",
              "0      0  330911  7.8292   NaN        Q  \n",
              "1      0  363272  7.0000   NaN        S  \n",
              "2      0  240276  9.6875   NaN        Q  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-fb9844d5-84fc-4243-957d-a03dddcf4fc9\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>PassengerId</th>\n",
              "      <th>Pclass</th>\n",
              "      <th>Name</th>\n",
              "      <th>Sex</th>\n",
              "      <th>Age</th>\n",
              "      <th>SibSp</th>\n",
              "      <th>Parch</th>\n",
              "      <th>Ticket</th>\n",
              "      <th>Fare</th>\n",
              "      <th>Cabin</th>\n",
              "      <th>Embarked</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>892</td>\n",
              "      <td>3</td>\n",
              "      <td>Kelly, Mr. James</td>\n",
              "      <td>male</td>\n",
              "      <td>34.5</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>330911</td>\n",
              "      <td>7.8292</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Q</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>893</td>\n",
              "      <td>3</td>\n",
              "      <td>Wilkes, Mrs. James (Ellen Needs)</td>\n",
              "      <td>female</td>\n",
              "      <td>47.0</td>\n",
              "      <td>1</td>\n",
              "      <td>0</td>\n",
              "      <td>363272</td>\n",
              "      <td>7.0000</td>\n",
              "      <td>NaN</td>\n",
              "      <td>S</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>894</td>\n",
              "      <td>2</td>\n",
              "      <td>Myles, Mr. Thomas Francis</td>\n",
              "      <td>male</td>\n",
              "      <td>62.0</td>\n",
              "      <td>0</td>\n",
              "      <td>0</td>\n",
              "      <td>240276</td>\n",
              "      <td>9.6875</td>\n",
              "      <td>NaN</td>\n",
              "      <td>Q</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-fb9844d5-84fc-4243-957d-a03dddcf4fc9')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-fb9844d5-84fc-4243-957d-a03dddcf4fc9 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-fb9844d5-84fc-4243-957d-a03dddcf4fc9');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-e7a93a95-3085-47fb-9f96-6076087f0b4b\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e7a93a95-3085-47fb-9f96-6076087f0b4b')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-e7a93a95-3085-47fb-9f96-6076087f0b4b button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "test",
              "summary": "{\n  \"name\": \"test\",\n  \"rows\": 418,\n  \"fields\": [\n    {\n      \"column\": \"PassengerId\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 120,\n        \"min\": 892,\n        \"max\": 1309,\n        \"num_unique_values\": 418,\n        \"samples\": [\n          1213,\n          1216,\n          1280\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Pclass\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 3,\n        \"num_unique_values\": 3,\n        \"samples\": [\n          3,\n          2,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Name\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 418,\n        \"samples\": [\n          \"Krekorian, Mr. Neshan\",\n          \"Kreuchen, Miss. Emilie\",\n          \"Canavan, Mr. Patrick\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Sex\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 2,\n        \"samples\": [\n          \"female\",\n          \"male\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Age\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 14.18120923562442,\n        \"min\": 0.17,\n        \"max\": 76.0,\n        \"num_unique_values\": 79,\n        \"samples\": [\n          10.0,\n          34.5\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"SibSp\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 8,\n        \"num_unique_values\": 7,\n        \"samples\": [\n          0,\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Parch\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 9,\n        \"num_unique_values\": 8,\n        \"samples\": [\n          1,\n          6\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Ticket\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 363,\n        \"samples\": [\n          \"2673\",\n          \"W./C. 6607\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Fare\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 55.90757617997383,\n        \"min\": 0.0,\n        \"max\": 512.3292,\n        \"num_unique_values\": 169,\n        \"samples\": [\n          41.5792,\n          57.75\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Cabin\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 76,\n        \"samples\": [\n          \"A21\",\n          \"E45\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Embarked\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"Q\",\n          \"S\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 81
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "a = list(test.columns)\n",
        "num = []\n",
        "cat = []\n",
        "for i in range(len(a)):\n",
        "    if missing_values_table['% of Total Values'].iloc[i] > 0:\n",
        "        if missing_values_table['% of Total Values'].iloc[i] > 40:\n",
        "            continue\n",
        "        elif str(missing_values_table['Column'].iloc[i]) in categorical_features:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i], \"CATEGORICAL\")\n",
        "            cat.append(str(missing_values_table['Column'].iloc[i]))\n",
        "        else:\n",
        "            print(missing_values_table['Column'].iloc[i], missing_values_table['% of Total Values'].iloc[i])\n",
        "            num.append(str(missing_values_table['Column'].iloc[i]))\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "D0kD2MfdQVO8",
        "outputId": "211a6081-9c17-4a69-9976-7b10cfc8a321"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Age 19.865319865319865\n",
            "Embarked 0.22446689113355783 CATEGORICAL\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "test = test.drop(drp, axis = 1)\n",
        "\n",
        "imputer = SimpleImputer(strategy='mean')\n",
        "test[num] = imputer.fit_transform(test[num])\n",
        "\n",
        "imputer_cat = SimpleImputer(strategy='most_frequent')\n",
        "test[cat] = imputer_cat.fit_transform(test[cat])"
      ],
      "metadata": {
        "id": "kPTizT0IPr4W"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "id = test['PassengerId']\n",
        "test = test.drop('PassengerId', axis = 1)"
      ],
      "metadata": {
        "id": "3g-o_Y5jUI0M"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "final_preds = best_model.predict(test)\n",
        "\n",
        "final = pd.DataFrame({\n",
        "    \"PassengerId\": id,\n",
        "    \"Survived\": final_preds\n",
        "})\n",
        "final.head(3)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 143
        },
        "id": "r_WNf6D4Tgjz",
        "outputId": "b13cc5d2-1c6f-4005-d7e7-46d41d1ad3f9"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   PassengerId  Survived\n",
              "0          892         0\n",
              "1          893         1\n",
              "2          894         0"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-f1a5c785-2814-48d4-9443-4d3851cea905\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>PassengerId</th>\n",
              "      <th>Survived</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>892</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>893</td>\n",
              "      <td>1</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>894</td>\n",
              "      <td>0</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f1a5c785-2814-48d4-9443-4d3851cea905')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-f1a5c785-2814-48d4-9443-4d3851cea905 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-f1a5c785-2814-48d4-9443-4d3851cea905');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-1851e6e0-739c-43b2-84bb-cb7ae77cbaba\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-1851e6e0-739c-43b2-84bb-cb7ae77cbaba')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-1851e6e0-739c-43b2-84bb-cb7ae77cbaba button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "final",
              "summary": "{\n  \"name\": \"final\",\n  \"rows\": 418,\n  \"fields\": [\n    {\n      \"column\": \"PassengerId\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 120,\n        \"min\": 892,\n        \"max\": 1309,\n        \"num_unique_values\": 418,\n        \"samples\": [\n          1213,\n          1216,\n          1280\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"Survived\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 0,\n        \"max\": 1,\n        \"num_unique_values\": 2,\n        \"samples\": [\n          1,\n          0\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 96
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "final.to_csv('final_2.csv', index=False)"
      ],
      "metadata": {
        "id": "vJvWgfZGUGHM"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Classification YOLO"
      ],
      "metadata": {
        "id": "NJUwS6uF_nPO"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "import os\n",
        "from ultralytics import YOLO\n",
        "\n",
        "DATA_DIR = \"path/to/dataset\"\n",
        "TEST_DIR = \"path/to/test\"\n",
        "OUTPUT_CSV = \"submission.csv\"\n",
        "# n - s - m - l - x\n",
        "model = YOLO(\"yolov11x-cls.pt\")\n",
        "\n",
        "model.train(\n",
        "    data=DATA_DIR,\n",
        "    epochs=10,\n",
        "    imgsz=640,\n",
        "    batch=16,\n",
        "    lr0=1e-3,\n",
        "    momentum=0.9,\n",
        "    augment=True\n",
        ")\n",
        "\n",
        "model_path = \"runs/train/exp/weights/best.pt\"\n",
        "model = YOLO(model_path)\n",
        "\n",
        "test_images = [f for f in os.listdir(TEST_DIR) if f.endswith((\".jpg\", \".png\"))]\n",
        "\n",
        "predictions = []\n",
        "\n",
        "for img_name in test_images:\n",
        "    img_path = os.path.join(TEST_DIR, img_name)\n",
        "    results = model(img_path)\n",
        "\n",
        "    predicted_class = results[0].probs.top1\n",
        "    predictions.append((img_name, predicted_class))\n",
        "\n",
        "df = pd.DataFrame(predictions, columns=[\"filename\", \"class\"])\n",
        "df.to_csv(OUTPUT_CSV, index=False)\n",
        "\n",
        "print(f\"✅ Предсказания сохранены в {OUTPUT_CSV}\")\n"
      ],
      "metadata": {
        "id": "LOe1KDtKAo4H"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Detection YOLO"
      ],
      "metadata": {
        "id": "aAlncS4tPKyR"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#Преобразование из csv в yaml\n",
        "import os\n",
        "import pandas as pd\n",
        "from PIL import Image\n",
        "\n",
        "images_dir = \"path/to/images\"\n",
        "csv_file = \"path/to/annotations.csv\"\n",
        "output_dir = \"path/to/yolo_format\"\n",
        "\n",
        "annotations = pd.read_csv(csv_file)\n",
        "\n",
        "def normalize_coordinates(x1, y1, x2, y2, img_width, img_height):\n",
        "    x_center = (x1 + x2) / 2 / img_width\n",
        "    y_center = (y1 + y2) / 2 / img_height\n",
        "    width = (x2 - x1) / img_width\n",
        "    height = (y2 - y1) / img_height\n",
        "    return x_center, y_center, width, height\n",
        "\n",
        "for img_name in annotations['filename'].unique():\n",
        "    img_path = os.path.join(images_dir, img_name)\n",
        "    img = Image.open(img_path)\n",
        "    img_width, img_height = img.size\n",
        "\n",
        "    txt_file = os.path.join(output_dir, img_name.replace('.jpg', '.txt').replace('.png', '.txt'))\n",
        "\n",
        "    with open(txt_file, 'w') as f:\n",
        "        img_annotations = annotations[annotations['filename'] == img_name]\n",
        "\n",
        "        for _, row in img_annotations.iterrows():\n",
        "            class_id = row['class']\n",
        "            x_center, y_center, width, height = normalize_coordinates(\n",
        "                row['x1'], row['y1'], row['x2'], row['y2'], img_width, img_height\n",
        "            )\n",
        "            f.write(f\"{class_id} {x_center} {y_center} {width} {height}\\n\")\n",
        "\n"
      ],
      "metadata": {
        "id": "2nZ4zfEcPP-N"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# dataset.yaml\n",
        "# train: path/to/dataset/images/train  # Путь к изображениям для обучения\n",
        "# val: path/to/dataset/images/val      # Путь к изображениям для валидации\n",
        "# test: path/to/dataset/images/test    # Путь к изображениям для тестирования\n",
        "\n",
        "# nc: 3  # Количество классов\n",
        "# names: ['car', 'person', 'dog']  # Имена классов\n"
      ],
      "metadata": {
        "id": "1KghBRiCQz7o"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import os\n",
        "import pandas as pd\n",
        "from ultralytics import YOLO\n",
        "\n",
        "# === 1. ПОДГОТОВКА ДАННЫХ ===\n",
        "DATA_YAML = \"path/to/dataset.yaml\"  # Файл конфигурации датасета\n",
        "TEST_DIR = \"path/to/test\"  # Папка с тестовыми изображениями\n",
        "OUTPUT_CSV = \"submission.csv\"  # Файл для предсказаний\n",
        "\n",
        "# === 2. ОБУЧЕНИЕ YOLOv11 ===\n",
        "model = YOLO(\"yolov11n.pt\")  # Используем Nano-версию для быстрого обучения\n",
        "\n",
        "model.train(\n",
        "    data=DATA_YAML,  # Путь к файлу .yaml с разметкой\n",
        "    epochs=10,       # Количество эпох\n",
        "    imgsz=640,       # Размер входного изображения\n",
        "    batch=16,        # Размер батча\n",
        "    lr0=1e-3,        # Начальный learning rate\n",
        "    momentum=0.9,    # Стабильность градиентов\n",
        "    augment=True     # Включаем встроенные аугментации\n",
        ")\n",
        "\n",
        "model_path = \"runs/train/exp/weights/best.pt\"\n",
        "model = YOLO(model_path)\n",
        "\n",
        "test_images = [f for f in os.listdir(TEST_DIR) if f.endswith((\".jpg\", \".png\"))]\n",
        "\n",
        "predictions = []\n",
        "\n",
        "for img_name in test_images:\n",
        "    img_path = os.path.join(TEST_DIR, img_name)\n",
        "    results = model(img_path)\n",
        "\n",
        "    for result in results:\n",
        "        for box in result.boxes.data:\n",
        "            x1, y1, x2, y2, conf, cls = box.tolist()\n",
        "            predictions.append((img_name, int(cls), conf, x1, y1, x2, y2))\n",
        "\n",
        "df = pd.DataFrame(predictions, columns=[\"filename\", \"class\", \"confidence\", \"x1\", \"y1\", \"x2\", \"y2\"])\n",
        "df.to_csv(OUTPUT_CSV, index=False)"
      ],
      "metadata": {
        "id": "lDVTY5XRRwQG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Segmentation YOLO"
      ],
      "metadata": {
        "id": "qpg8QGRLckvg"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "import os\n",
        "from PIL import Image\n",
        "\n",
        "csv_path = 'annotations.csv'\n",
        "images_dir = 'path/to/images'\n",
        "output_dir = 'yolo_annotations'\n",
        "\n",
        "# Создаем папку для сохранения аннотаций\n",
        "os.makedirs(output_dir, exist_ok=True)\n",
        "\n",
        "data = pd.read_csv(csv_path)\n",
        "\n",
        "def convert_to_yolo_format(image_width, image_height, x_min, y_min, x_max, y_max):\n",
        "    x_center = (x_min + x_max) / 2 / image_width\n",
        "    y_center = (y_min + y_max) / 2 / image_height\n",
        "    width = (x_max - x_min) / image_width\n",
        "    height = (y_max - y_min) / image_height\n",
        "    return x_center, y_center, width, height\n",
        "\n",
        "for image_name in os.listdir(images_dir):\n",
        "    image_path = os.path.join(images_dir, image_name)\n",
        "\n",
        "    with Image.open(image_path) as img:\n",
        "        image_width, image_height = img.size\n",
        "\n",
        "    image_annotations = data[data['image_name'] == image_name]\n",
        "\n",
        "    annotation_file = os.path.join(output_dir, os.path.splitext(image_name)[0] + '.txt')\n",
        "    with open(annotation_file, 'w') as f:\n",
        "        for _, row in image_annotations.iterrows():\n",
        "            class_id = row['class_id']\n",
        "            x_min, y_min, x_max, y_max = row['x_min'], row['y_min'], row['x_max'], row['y_max']\n",
        "\n",
        "            x_center, y_center, width, height = convert_to_yolo_format(image_width, image_height, x_min, y_min, x_max, y_max)\n",
        "\n",
        "            f.write(f\"{class_id} {x_center} {y_center} {width} {height}\\n\")\n"
      ],
      "metadata": {
        "id": "QeFxyZnicqk_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# train: path/to/train/images\n",
        "# val: path/to/val/images\n",
        "# test: path/to/test/images\n",
        "\n",
        "# nc: 2  # Количество классов\n",
        "# names: ['class_0', 'class_1']  # Имена классов"
      ],
      "metadata": {
        "id": "BrvqznGjdW24"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from ultralytics import YOLO\n",
        "import os\n",
        "import pandas as pd\n",
        "\n",
        "def mask_to_rle(mask):\n",
        "    mask = mask.astype(np.uint8)\n",
        "\n",
        "    rle = mask_util.encode(np.asfortranarray(mask))\n",
        "\n",
        "    rle_string = str(rle['counts'], encoding='utf-8')\n",
        "    return rle_string\n",
        "\n",
        "data_path = 'path/to/data.yaml'\n",
        "\n",
        "model = YOLO('yolo11n-seg.pt')\n",
        "\n",
        "model.train(data=data_path, epochs=50, imgsz=640, batch_size=16, augment=True)\n",
        "\n",
        "model.save('yolov11_segmentation_model.pt')\n",
        "\n",
        "test_path = 'path/to/test/images'\n",
        "results = model.predict(test_path, save=True)\n",
        "\n",
        "predictions = []\n",
        "for image_path, pred in zip(results.files, results.pred):\n",
        "    file_name = os.path.basename(image_path)\n",
        "\n",
        "    masks = pred.masks\n",
        "\n",
        "    if masks is not None:\n",
        "        for idx, mask in enumerate(masks):\n",
        "            rle_string = mask_to_rle(mask)\n",
        "            predictions.append([file_name, idx, rle_string])\n",
        "\n",
        "df = pd.DataFrame(predictions, columns=['file_name', 'object_id', 'rle'])\n",
        "df.to_csv('final.csv', index=False)\n"
      ],
      "metadata": {
        "id": "bk8iuOpChbdr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Pose Detection"
      ],
      "metadata": {
        "id": "pwDRSup3lhnX"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from ultralytics import YOLO\n",
        "import os\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "from PIL import Image\n",
        "\n",
        "data_path = 'path/to/data.yaml'\n",
        "\n",
        "model = YOLO('yolov11-pose.pt')\n",
        "\n",
        "model.train(data=data_path, epochs=50, imgsz=640, batch_size=16)\n",
        "\n",
        "test_path = 'path/to/test/images'\n",
        "results = model.predict(test_path, save=True)\n",
        "\n",
        "def extract_keypoints(pred):\n",
        "    keypoints = []\n",
        "    for i, person in enumerate(pred.keypoints):\n",
        "        keypoints.append(list(person.xy))\n",
        "    return keypoints\n",
        "\n",
        "predictions = []\n",
        "for image_path, pred in zip(results.files, results.pred):\n",
        "    file_name = os.path.basename(image_path)\n",
        "\n",
        "    keypoints = extract_keypoints(pred)\n",
        "\n",
        "    if keypoints:\n",
        "        for person_id, person_keypoints in enumerate(keypoints):\n",
        "            # Сохраняем координаты точек для каждого человека\n",
        "            for idx, (x, y) in enumerate(person_keypoints):\n",
        "                predictions.append([file_name, person_id, idx, x, y])  # Добавляем все ключевые точки\n",
        "\n",
        "df = pd.DataFrame(predictions, columns=['file_name', 'person_id', 'keypoint_id', 'x', 'y'])\n",
        "df.to_csv('final.csv', index=False)\n"
      ],
      "metadata": {
        "id": "2I1qy3cmlpW-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# RecSys Surprise"
      ],
      "metadata": {
        "id": "7hDyA__j0_cg"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install scikit-surprise optuna"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Bvwtxw401Daf",
        "outputId": "72dffabf-8da5-4eaa-9af5-0a1297fcdf21"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: scikit-surprise in /usr/local/lib/python3.11/dist-packages (1.1.4)\n",
            "Collecting optuna\n",
            "  Downloading optuna-4.2.1-py3-none-any.whl.metadata (17 kB)\n",
            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise) (1.4.2)\n",
            "Requirement already satisfied: numpy>=1.19.5 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise) (1.26.4)\n",
            "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from scikit-surprise) (1.13.1)\n",
            "Collecting alembic>=1.5.0 (from optuna)\n",
            "  Downloading alembic-1.14.1-py3-none-any.whl.metadata (7.4 kB)\n",
            "Collecting colorlog (from optuna)\n",
            "  Downloading colorlog-6.9.0-py3-none-any.whl.metadata (10 kB)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from optuna) (24.2)\n",
            "Requirement already satisfied: sqlalchemy>=1.4.2 in /usr/local/lib/python3.11/dist-packages (from optuna) (2.0.38)\n",
            "Requirement already satisfied: tqdm in /usr/local/lib/python3.11/dist-packages (from optuna) (4.67.1)\n",
            "Requirement already satisfied: PyYAML in /usr/local/lib/python3.11/dist-packages (from optuna) (6.0.2)\n",
            "Collecting Mako (from alembic>=1.5.0->optuna)\n",
            "  Downloading Mako-1.3.9-py3-none-any.whl.metadata (2.9 kB)\n",
            "Requirement already satisfied: typing-extensions>=4 in /usr/local/lib/python3.11/dist-packages (from alembic>=1.5.0->optuna) (4.12.2)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from sqlalchemy>=1.4.2->optuna) (3.1.1)\n",
            "Requirement already satisfied: MarkupSafe>=0.9.2 in /usr/local/lib/python3.11/dist-packages (from Mako->alembic>=1.5.0->optuna) (3.0.2)\n",
            "Downloading optuna-4.2.1-py3-none-any.whl (383 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m383.6/383.6 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading alembic-1.14.1-py3-none-any.whl (233 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m233.6/233.6 kB\u001b[0m \u001b[31m19.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading colorlog-6.9.0-py3-none-any.whl (11 kB)\n",
            "Downloading Mako-1.3.9-py3-none-any.whl (78 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m78.5/78.5 kB\u001b[0m \u001b[31m5.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: Mako, colorlog, alembic, optuna\n",
            "Successfully installed Mako-1.3.9 alembic-1.14.1 colorlog-6.9.0 optuna-4.2.1\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import random\n",
        "import pandas as pd\n",
        "from surprise import Dataset, Reader, SVD\n",
        "from surprise.model_selection import train_test_split\n",
        "from surprise import accuracy"
      ],
      "metadata": {
        "id": "tyKsXFGU2Ikq"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "from surprise import Dataset, Reader, SVD\n",
        "from surprise.model_selection import cross_validate, GridSearchCV\n",
        "\n",
        "# 📌 Загружаем MovieLens 100K\n",
        "data = Dataset.load_builtin('ml-100k')\n",
        "\n",
        "# 📌 Определяем параметры для Grid Search\n",
        "param_grid = {\n",
        "    'n_factors': [50, 100, 150],  # Количество латентных факторов\n",
        "    'lr_all': [0.002, 0.005, 0.01],  # Скорость обучения\n",
        "    'reg_all': [0.02, 0.05, 0.1]  # Регуляризация\n",
        "}\n",
        "\n",
        "# 📌 Запускаем Grid Search\n",
        "gs = GridSearchCV(SVD, param_grid, measures=['rmse'], cv=5, n_jobs=-1)\n",
        "gs.fit(data)\n",
        "\n",
        "# 📌 Выводим лучшие параметры\n",
        "print(f\"📊 Лучший RMSE: {gs.best_score['rmse']:.4f}\")\n",
        "print(f\"🔧 Лучшие параметры: {gs.best_params['rmse']}\")\n",
        "\n",
        "# 📌 Обучаем финальную модель с лучшими параметрами\n",
        "best_model = SVD(**gs.best_params['rmse'])\n",
        "cross_validate(best_model, data, cv=5, verbose=True)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "_zNeD2K83qqp",
        "outputId": "529eb700-2d6a-4dde-9077-bb9ed2ab7d96"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "📊 Лучший RMSE: 0.9181\n",
            "🔧 Лучшие параметры: {'n_factors': 150, 'lr_all': 0.01, 'reg_all': 0.1}\n",
            "Evaluating RMSE, MAE of algorithm SVD on 5 split(s).\n",
            "\n",
            "                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     \n",
            "RMSE (testset)    0.9226  0.9176  0.9012  0.9308  0.9212  0.9187  0.0098  \n",
            "MAE (testset)     0.7307  0.7283  0.7118  0.7365  0.7283  0.7271  0.0082  \n",
            "Fit time          1.19    1.48    1.15    1.17    1.13    1.22    0.13    \n",
            "Test time         0.11    0.15    0.07    0.15    0.07    0.11    0.03    \n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'test_rmse': array([0.92262018, 0.91755708, 0.90115132, 0.93077068, 0.92121232]),\n",
              " 'test_mae': array([0.73069412, 0.72832189, 0.71182612, 0.7364805 , 0.72833285]),\n",
              " 'fit_time': (1.185208797454834,\n",
              "  1.483828067779541,\n",
              "  1.1467053890228271,\n",
              "  1.1749987602233887,\n",
              "  1.1339025497436523),\n",
              " 'test_time': (0.1124725341796875,\n",
              "  0.15291881561279297,\n",
              "  0.07221841812133789,\n",
              "  0.14911413192749023,\n",
              "  0.0736684799194336)}"
            ]
          },
          "metadata": {},
          "execution_count": 12
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import optuna\n",
        "import pandas as pd\n",
        "from surprise import Dataset, Reader, KNNBasic\n",
        "from surprise.model_selection import cross_validate\n",
        "\n",
        "# 📌 Загружаем MovieLens 100K\n",
        "data = Dataset.load_builtin('ml-100k')\n",
        "\n",
        "# 📌 Функция для оптимизации гиперпараметров\n",
        "def objective(trial):\n",
        "    sim_options = {\n",
        "        \"name\": trial.suggest_categorical(\"name\", [\"cosine\", \"pearson\", \"msd\"]),\n",
        "        \"user_based\": trial.suggest_categorical(\"user_based\", [True, False])\n",
        "    }\n",
        "\n",
        "    model = KNNBasic(k=trial.suggest_int(\"k\", 10, 50), sim_options=sim_options)\n",
        "\n",
        "    cv_results = cross_validate(model, data, measures=[\"rmse\"], cv=3, verbose=False)\n",
        "    return cv_results[\"test_rmse\"].mean()\n",
        "\n",
        "# 📌 Оптимизация гиперпараметров с Optuna\n",
        "study = optuna.create_study(direction=\"minimize\")\n",
        "study.optimize(objective, n_trials=30)\n",
        "\n",
        "# 📌 Выводим лучшие параметры\n",
        "best_params = study.best_params\n",
        "print(f\"📊 Лучший RMSE: {study.best_value:.4f}\")\n",
        "print(f\"🔧 Лучшие параметры: {best_params}\")\n",
        "\n",
        "# 📌 Обучаем финальную модель с лучшими параметрами\n",
        "final_sim_options = {\"name\": best_params[\"name\"], \"user_based\": best_params[\"user_based\"]}\n",
        "best_model = KNNBasic(k=best_params[\"k\"], sim_options=final_sim_options)\n",
        "\n",
        "cross_validate(best_model, data, measures=[\"rmse\"], cv=5, verbose=True)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "YyEITx6a2hi2",
        "outputId": "8d8680a7-ce25-4241-f56d-15be36497d5e"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:53:10,905] A new study created in memory with name: no-name-4ee24999-736b-4631-a413-5080a373b7b0\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:53:30,955] Trial 0 finished with value: 1.0273098011901922 and parameters: {'name': 'pearson', 'user_based': True, 'k': 19}. Best is trial 0 with value: 1.0273098011901922.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:53:40,559] Trial 1 finished with value: 1.040094723866775 and parameters: {'name': 'pearson', 'user_based': True, 'k': 11}. Best is trial 0 with value: 1.0273098011901922.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:53:53,032] Trial 2 finished with value: 1.073262901812824 and parameters: {'name': 'cosine', 'user_based': False, 'k': 16}. Best is trial 0 with value: 1.0273098011901922.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:54:06,531] Trial 3 finished with value: 1.050133624804347 and parameters: {'name': 'cosine', 'user_based': False, 'k': 25}. Best is trial 0 with value: 1.0273098011901922.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:54:18,437] Trial 4 finished with value: 1.0207003527145204 and parameters: {'name': 'cosine', 'user_based': True, 'k': 47}. Best is trial 4 with value: 1.0207003527145204.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:54:30,850] Trial 5 finished with value: 1.0596060578187558 and parameters: {'name': 'cosine', 'user_based': False, 'k': 21}. Best is trial 4 with value: 1.0207003527145204.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the cosine similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:54:41,313] Trial 6 finished with value: 1.035549661559487 and parameters: {'name': 'cosine', 'user_based': True, 'k': 15}. Best is trial 4 with value: 1.0207003527145204.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:54:52,725] Trial 7 finished with value: 1.0277808579160697 and parameters: {'name': 'pearson', 'user_based': True, 'k': 19}. Best is trial 4 with value: 1.0207003527145204.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:55:04,190] Trial 8 finished with value: 0.9869635347848069 and parameters: {'name': 'msd', 'user_based': True, 'k': 38}. Best is trial 8 with value: 0.9869635347848069.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:55:18,106] Trial 9 finished with value: 0.9867222892578903 and parameters: {'name': 'msd', 'user_based': False, 'k': 50}. Best is trial 9 with value: 0.9867222892578903.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:55:32,326] Trial 10 finished with value: 0.9858887823748562 and parameters: {'name': 'msd', 'user_based': False, 'k': 49}. Best is trial 10 with value: 0.9858887823748562.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:55:46,312] Trial 11 finished with value: 0.9870863575929674 and parameters: {'name': 'msd', 'user_based': False, 'k': 50}. Best is trial 10 with value: 0.9858887823748562.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:56:00,123] Trial 12 finished with value: 0.9851113891510596 and parameters: {'name': 'msd', 'user_based': False, 'k': 40}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:56:13,653] Trial 13 finished with value: 0.9864620349826786 and parameters: {'name': 'msd', 'user_based': False, 'k': 40}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:56:26,697] Trial 14 finished with value: 0.9856580660612981 and parameters: {'name': 'msd', 'user_based': False, 'k': 37}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:56:39,580] Trial 15 finished with value: 0.9853303657602779 and parameters: {'name': 'msd', 'user_based': False, 'k': 34}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:56:52,224] Trial 16 finished with value: 0.9869874201851742 and parameters: {'name': 'msd', 'user_based': False, 'k': 32}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:57:04,967] Trial 17 finished with value: 0.9869053043690581 and parameters: {'name': 'msd', 'user_based': False, 'k': 31}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:57:18,816] Trial 18 finished with value: 0.9860014872498327 and parameters: {'name': 'msd', 'user_based': False, 'k': 43}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:57:31,623] Trial 19 finished with value: 0.9860790732255814 and parameters: {'name': 'msd', 'user_based': False, 'k': 34}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:57:45,444] Trial 20 finished with value: 1.0577893280614548 and parameters: {'name': 'pearson', 'user_based': False, 'k': 28}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:57:58,649] Trial 21 finished with value: 0.9856411564475596 and parameters: {'name': 'msd', 'user_based': False, 'k': 36}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:58:12,506] Trial 22 finished with value: 0.9863498341826844 and parameters: {'name': 'msd', 'user_based': False, 'k': 43}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:58:25,961] Trial 23 finished with value: 0.9865147605031322 and parameters: {'name': 'msd', 'user_based': False, 'k': 35}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:58:38,301] Trial 24 finished with value: 0.9880187808024873 and parameters: {'name': 'msd', 'user_based': False, 'k': 27}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:58:51,496] Trial 25 finished with value: 0.9859721086989618 and parameters: {'name': 'msd', 'user_based': False, 'k': 43}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:59:04,643] Trial 26 finished with value: 0.9853186408076082 and parameters: {'name': 'msd', 'user_based': False, 'k': 40}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:59:17,650] Trial 27 finished with value: 0.987735307296505 and parameters: {'name': 'msd', 'user_based': False, 'k': 40}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the pearson similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:59:32,391] Trial 28 finished with value: 1.046721221705121 and parameters: {'name': 'pearson', 'user_based': False, 'k': 46}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[I 2025-02-24 16:59:43,336] Trial 29 finished with value: 0.9900902320723297 and parameters: {'name': 'msd', 'user_based': True, 'k': 40}. Best is trial 12 with value: 0.9851113891510596.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "📊 Лучший RMSE: 0.9851\n",
            "🔧 Лучшие параметры: {'name': 'msd', 'user_based': False, 'k': 40}\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Computing the msd similarity matrix...\n",
            "Done computing similarity matrix.\n",
            "Evaluating RMSE of algorithm KNNBasic on 5 split(s).\n",
            "\n",
            "                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     \n",
            "RMSE (testset)    0.9697  0.9694  0.9790  0.9778  0.9769  0.9746  0.0041  \n",
            "Fit time          0.76    0.53    0.48    0.52    0.52    0.56    0.10    \n",
            "Test time         2.75    2.70    2.53    3.01    2.62    2.72    0.16    \n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'test_rmse': array([0.96973846, 0.96939912, 0.97902707, 0.97778443, 0.97694979]),\n",
              " 'fit_time': (0.7602078914642334,\n",
              "  0.5323970317840576,\n",
              "  0.48438191413879395,\n",
              "  0.5157642364501953,\n",
              "  0.5216193199157715),\n",
              " 'test_time': (2.753460168838501,\n",
              "  2.6966280937194824,\n",
              "  2.530104160308838,\n",
              "  3.0083537101745605,\n",
              "  2.6171016693115234)}"
            ]
          },
          "metadata": {},
          "execution_count": 15
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "\n",
        "  \\begin{array}{|c|c|c|c|c|c|c|c|}\\hline\\\\ \\\\\n",
        "  \\mathcal{} & Movielens 100k & RMSE & MAE & Time  \\\\ \\hline\\\\\n",
        "  a & SVD & 0.934 & 0.737 & 0:00:06   \\\\ \\hline\\\\ \\\\\n",
        "  b & SVD++ (cache_ratings=False) & 0.919 & 0.721 & 0:01:39\\\\ \\hline\\\\ \\\\\n",
        "  c & SVD++ (cache_ratings=True) & 0.919 & 0.721 & 0:01:22  \\\\ \\hline\\\\ \\\\\n",
        "  d & NMF & 0.963 & 0.758 & 0:00:06 \\\\ \\hline\\\\ \\\\\n",
        "  e & Slope One & 0.946 & 0.743 & 0:00:09 \\\\ \\hline\\\\ \\\\\n",
        "  f & k-NN & 0.98 & 0.774 & 0:00:08 \\\\ \\hline\\\\ \\\\\n",
        "  g & Slope One & 0.946 & 0.743 & 0:00:09 \\\\ \\hline\\\\ \\\\\n",
        "  h & Centered k-NN & 0.951 & 0.749 & 0:00:09 \\\\ \\hline\\\\ \\\\\n",
        "  i & k-NN Baseline & 0.931 & 0.733 & 0:00:13 \\\\ \\hline\\\\ \\\\\n",
        "  j & Co-Clustering & 0.963 & 0.753 & 0:00:06 \\\\ \\hline\\\\ \\\\\n",
        "  k & Random & 1.518 & 1.219 & 0:00:01 \\\\ \\hline\\\\ \\\\\n",
        "  l & Baseline & 0.944 & 0.748 & 0:00:02 \\\\ \\hline\n",
        "  \\end{array}\n",
        "\n",
        "\n"
      ],
      "metadata": {
        "id": "nRpCIQAxBtLW"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "# RecSys Rectools"
      ],
      "metadata": {
        "id": "DeD2dGjbMWQS"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install rectools\n",
        "!pip install rectools[all]\n",
        "!pip install rectools[torch]"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "zCse_PYDM4iu",
        "outputId": "4aaebdbd-cbfc-4a61-e5f9-bdb5e421a515"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: rectools in /usr/local/lib/python3.11/dist-packages (0.12.0)\n",
            "Requirement already satisfied: attrs<24.0.0,>=19.1.0 in /usr/local/lib/python3.11/dist-packages (from rectools) (23.2.0)\n",
            "Requirement already satisfied: implicit<0.8.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from rectools) (0.7.2)\n",
            "Requirement already satisfied: numpy<2.0.0,>=1.22 in /usr/local/lib/python3.11/dist-packages (from rectools) (1.26.4)\n",
            "Requirement already satisfied: pandas<3.0.0,>=1.5.0 in /usr/local/lib/python3.11/dist-packages (from rectools) (2.2.2)\n",
            "Requirement already satisfied: pydantic<3.0.0,>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from rectools) (2.10.6)\n",
            "Requirement already satisfied: pydantic-core<3.0.0,>=2.20.1 in /usr/local/lib/python3.11/dist-packages (from rectools) (2.27.2)\n",
            "Requirement already satisfied: scipy<1.13,>=1.10.1 in /usr/local/lib/python3.11/dist-packages (from rectools) (1.12.0)\n",
            "Requirement already satisfied: tqdm<5.0.0,>=4.27.0 in /usr/local/lib/python3.11/dist-packages (from rectools) (4.67.1)\n",
            "Requirement already satisfied: typeguard<5.0.0,>=4.1.0 in /usr/local/lib/python3.11/dist-packages (from rectools) (4.4.2)\n",
            "Requirement already satisfied: typing-extensions<5.0.0,>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from rectools) (4.12.2)\n",
            "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.11/dist-packages (from implicit<0.8.0,>=0.7.1->rectools) (3.5.0)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools) (2025.1)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools) (2025.1)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.8.2->rectools) (0.7.0)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0.0,>=1.5.0->rectools) (1.17.0)\n",
            "Requirement already satisfied: rectools[all] in /usr/local/lib/python3.11/dist-packages (0.12.0)\n",
            "Requirement already satisfied: attrs<24.0.0,>=19.1.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (23.2.0)\n",
            "Requirement already satisfied: cupy-cuda12x<14.0.0,>=13.3.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (13.3.0)\n",
            "Requirement already satisfied: implicit<0.8.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (0.7.2)\n",
            "Requirement already satisfied: ipywidgets<8.2,>=7.7 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (7.7.1)\n",
            "Requirement already satisfied: nbformat>=4.2.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (5.10.4)\n",
            "Requirement already satisfied: nmslib-metabrainz<3.0.0,>=2.1.3 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (2.1.3)\n",
            "Requirement already satisfied: numpy<2.0.0,>=1.22 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (1.26.4)\n",
            "Requirement already satisfied: pandas<3.0.0,>=1.5.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (2.2.2)\n",
            "Requirement already satisfied: plotly<6.0.0,>=5.22.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (5.24.1)\n",
            "Requirement already satisfied: pydantic<3.0.0,>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (2.10.6)\n",
            "Requirement already satisfied: pydantic-core<3.0.0,>=2.20.1 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (2.27.2)\n",
            "Requirement already satisfied: pytorch-lightning<3.0.0,>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (2.5.0.post0)\n",
            "Requirement already satisfied: rectools-lightfm<2.0.0,>=1.17.3 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (1.17.3)\n",
            "Requirement already satisfied: scipy<1.13,>=1.10.1 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (1.12.0)\n",
            "Requirement already satisfied: torch<3.0.0,>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (2.5.1+cu124)\n",
            "Requirement already satisfied: tqdm<5.0.0,>=4.27.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (4.67.1)\n",
            "Requirement already satisfied: typeguard<5.0.0,>=4.1.0 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (4.4.2)\n",
            "Requirement already satisfied: typing-extensions<5.0.0,>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from rectools[all]) (4.12.2)\n",
            "Requirement already satisfied: fastrlock>=0.5 in /usr/local/lib/python3.11/dist-packages (from cupy-cuda12x<14.0.0,>=13.3.0->rectools[all]) (0.8.3)\n",
            "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.11/dist-packages (from implicit<0.8.0,>=0.7.1->rectools[all]) (3.5.0)\n",
            "Requirement already satisfied: ipykernel>=4.5.1 in /usr/local/lib/python3.11/dist-packages (from ipywidgets<8.2,>=7.7->rectools[all]) (6.17.1)\n",
            "Requirement already satisfied: ipython-genutils~=0.2.0 in /usr/local/lib/python3.11/dist-packages (from ipywidgets<8.2,>=7.7->rectools[all]) (0.2.0)\n",
            "Requirement already satisfied: traitlets>=4.3.1 in /usr/local/lib/python3.11/dist-packages (from ipywidgets<8.2,>=7.7->rectools[all]) (5.7.1)\n",
            "Requirement already satisfied: widgetsnbextension~=3.6.0 in /usr/local/lib/python3.11/dist-packages (from ipywidgets<8.2,>=7.7->rectools[all]) (3.6.10)\n",
            "Requirement already satisfied: ipython>=4.0.0 in /usr/local/lib/python3.11/dist-packages (from ipywidgets<8.2,>=7.7->rectools[all]) (7.34.0)\n",
            "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from ipywidgets<8.2,>=7.7->rectools[all]) (3.0.13)\n",
            "Requirement already satisfied: fastjsonschema>=2.15 in /usr/local/lib/python3.11/dist-packages (from nbformat>=4.2.0->rectools[all]) (2.21.1)\n",
            "Requirement already satisfied: jsonschema>=2.6 in /usr/local/lib/python3.11/dist-packages (from nbformat>=4.2.0->rectools[all]) (4.23.0)\n",
            "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /usr/local/lib/python3.11/dist-packages (from nbformat>=4.2.0->rectools[all]) (5.7.2)\n",
            "Requirement already satisfied: pybind11>=2.2.3 in /usr/local/lib/python3.11/dist-packages (from nmslib-metabrainz<3.0.0,>=2.1.3->rectools[all]) (2.13.6)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python3.11/dist-packages (from nmslib-metabrainz<3.0.0,>=2.1.3->rectools[all]) (5.9.5)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools[all]) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools[all]) (2025.1)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools[all]) (2025.1)\n",
            "Requirement already satisfied: tenacity>=6.2.0 in /usr/local/lib/python3.11/dist-packages (from plotly<6.0.0,>=5.22.0->rectools[all]) (9.0.0)\n",
            "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from plotly<6.0.0,>=5.22.0->rectools[all]) (24.2)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.8.2->rectools[all]) (0.7.0)\n",
            "Requirement already satisfied: PyYAML>=5.4 in /usr/local/lib/python3.11/dist-packages (from pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (6.0.2)\n",
            "Requirement already satisfied: fsspec>=2022.5.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (2024.10.0)\n",
            "Requirement already satisfied: torchmetrics>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (1.6.1)\n",
            "Requirement already satisfied: lightning-utilities>=0.10.0 in /usr/local/lib/python3.11/dist-packages (from pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (0.12.0)\n",
            "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from rectools-lightfm<2.0.0,>=1.17.3->rectools[all]) (2.32.3)\n",
            "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from rectools-lightfm<2.0.0,>=1.17.3->rectools[all]) (1.6.1)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (3.17.0)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (3.4.2)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (3.1.5)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (9.1.0.70)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (12.4.5.8)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (11.2.1.3)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (10.3.5.147)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (11.6.1.9)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (12.3.1.170)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (2.21.5)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (12.4.127)\n",
            "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (3.1.0)\n",
            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[all]) (1.13.1)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3.0.0,>=1.6.0->rectools[all]) (1.3.0)\n",
            "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (3.11.12)\n",
            "Requirement already satisfied: debugpy>=1.0 in /usr/local/lib/python3.11/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.2,>=7.7->rectools[all]) (1.8.0)\n",
            "Requirement already satisfied: jupyter-client>=6.1.12 in /usr/local/lib/python3.11/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.2,>=7.7->rectools[all]) (6.1.12)\n",
            "Requirement already satisfied: matplotlib-inline>=0.1 in /usr/local/lib/python3.11/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.2,>=7.7->rectools[all]) (0.1.7)\n",
            "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.11/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.2,>=7.7->rectools[all]) (1.6.0)\n",
            "Requirement already satisfied: pyzmq>=17 in /usr/local/lib/python3.11/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.2,>=7.7->rectools[all]) (24.0.1)\n",
            "Requirement already satisfied: tornado>=6.1 in /usr/local/lib/python3.11/dist-packages (from ipykernel>=4.5.1->ipywidgets<8.2,>=7.7->rectools[all]) (6.4.2)\n",
            "Requirement already satisfied: setuptools>=18.5 in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (75.1.0)\n",
            "Requirement already satisfied: jedi>=0.16 in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.19.2)\n",
            "Requirement already satisfied: decorator in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (4.4.2)\n",
            "Requirement already satisfied: pickleshare in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.7.5)\n",
            "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (3.0.50)\n",
            "Requirement already satisfied: pygments in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (2.18.0)\n",
            "Requirement already satisfied: backcall in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.2.0)\n",
            "Requirement already satisfied: pexpect>4.3 in /usr/local/lib/python3.11/dist-packages (from ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (4.9.0)\n",
            "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->rectools[all]) (2024.10.1)\n",
            "Requirement already satisfied: referencing>=0.28.4 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->rectools[all]) (0.36.2)\n",
            "Requirement already satisfied: rpds-py>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from jsonschema>=2.6->nbformat>=4.2.0->rectools[all]) (0.22.3)\n",
            "Requirement already satisfied: platformdirs>=2.5 in /usr/local/lib/python3.11/dist-packages (from jupyter-core!=5.0.*,>=4.12->nbformat>=4.2.0->rectools[all]) (4.3.6)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0.0,>=1.5.0->rectools[all]) (1.17.0)\n",
            "Requirement already satisfied: notebook>=4.4.1 in /usr/local/lib/python3.11/dist-packages (from widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (6.5.5)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch<3.0.0,>=1.6.0->rectools[all]) (3.0.2)\n",
            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->rectools-lightfm<2.0.0,>=1.17.3->rectools[all]) (3.4.1)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->rectools-lightfm<2.0.0,>=1.17.3->rectools[all]) (3.10)\n",
            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->rectools-lightfm<2.0.0,>=1.17.3->rectools[all]) (2.3.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->rectools-lightfm<2.0.0,>=1.17.3->rectools[all]) (2025.1.31)\n",
            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->rectools-lightfm<2.0.0,>=1.17.3->rectools[all]) (1.4.2)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (2.4.6)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (1.3.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (6.1.0)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (0.2.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[all]) (1.18.3)\n",
            "Requirement already satisfied: parso<0.9.0,>=0.8.4 in /usr/local/lib/python3.11/dist-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.8.4)\n",
            "Requirement already satisfied: argon2-cffi in /usr/local/lib/python3.11/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (23.1.0)\n",
            "Requirement already satisfied: nbconvert>=5 in /usr/local/lib/python3.11/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (7.16.6)\n",
            "Requirement already satisfied: Send2Trash>=1.8.0 in /usr/local/lib/python3.11/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.8.3)\n",
            "Requirement already satisfied: terminado>=0.8.3 in /usr/local/lib/python3.11/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.18.1)\n",
            "Requirement already satisfied: prometheus-client in /usr/local/lib/python3.11/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.21.1)\n",
            "Requirement already satisfied: nbclassic>=0.4.7 in /usr/local/lib/python3.11/dist-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.2.0)\n",
            "Requirement already satisfied: ptyprocess>=0.5 in /usr/local/lib/python3.11/dist-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.7.0)\n",
            "Requirement already satisfied: wcwidth in /usr/local/lib/python3.11/dist-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.2.13)\n",
            "Requirement already satisfied: notebook-shim>=0.2.3 in /usr/local/lib/python3.11/dist-packages (from nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.2.4)\n",
            "Requirement already satisfied: beautifulsoup4 in /usr/local/lib/python3.11/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (4.13.3)\n",
            "Requirement already satisfied: bleach!=5.0.0 in /usr/local/lib/python3.11/dist-packages (from bleach[css]!=5.0.0->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (6.2.0)\n",
            "Requirement already satisfied: defusedxml in /usr/local/lib/python3.11/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.7.1)\n",
            "Requirement already satisfied: jupyterlab-pygments in /usr/local/lib/python3.11/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.3.0)\n",
            "Requirement already satisfied: mistune<4,>=2.0.3 in /usr/local/lib/python3.11/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (3.1.2)\n",
            "Requirement already satisfied: nbclient>=0.5.0 in /usr/local/lib/python3.11/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.10.2)\n",
            "Requirement already satisfied: pandocfilters>=1.4.1 in /usr/local/lib/python3.11/dist-packages (from nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.5.1)\n",
            "Requirement already satisfied: argon2-cffi-bindings in /usr/local/lib/python3.11/dist-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (21.2.0)\n",
            "Requirement already satisfied: webencodings in /usr/local/lib/python3.11/dist-packages (from bleach!=5.0.0->bleach[css]!=5.0.0->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (0.5.1)\n",
            "Requirement already satisfied: tinycss2<1.5,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from bleach[css]!=5.0.0->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.4.0)\n",
            "Requirement already satisfied: jupyter-server<3,>=1.8 in /usr/local/lib/python3.11/dist-packages (from notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.24.0)\n",
            "Requirement already satisfied: cffi>=1.0.1 in /usr/local/lib/python3.11/dist-packages (from argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.17.1)\n",
            "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.11/dist-packages (from beautifulsoup4->nbconvert>=5->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (2.6)\n",
            "Requirement already satisfied: pycparser in /usr/local/lib/python3.11/dist-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (2.22)\n",
            "Requirement already satisfied: anyio<4,>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (3.7.1)\n",
            "Requirement already satisfied: websocket-client in /usr/local/lib/python3.11/dist-packages (from jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.8.0)\n",
            "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio<4,>=3.1.0->jupyter-server<3,>=1.8->notebook-shim>=0.2.3->nbclassic>=0.4.7->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets<8.2,>=7.7->rectools[all]) (1.3.1)\n",
            "Requirement already satisfied: rectools[torch] in /usr/local/lib/python3.11/dist-packages (0.12.0)\n",
            "Requirement already satisfied: attrs<24.0.0,>=19.1.0 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (23.2.0)\n",
            "Requirement already satisfied: implicit<0.8.0,>=0.7.1 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (0.7.2)\n",
            "Requirement already satisfied: numpy<2.0.0,>=1.22 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (1.26.4)\n",
            "Requirement already satisfied: pandas<3.0.0,>=1.5.0 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (2.2.2)\n",
            "Requirement already satisfied: pydantic<3.0.0,>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (2.10.6)\n",
            "Requirement already satisfied: pydantic-core<3.0.0,>=2.20.1 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (2.27.2)\n",
            "Requirement already satisfied: pytorch-lightning<3.0.0,>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (2.5.0.post0)\n",
            "Requirement already satisfied: scipy<1.13,>=1.10.1 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (1.12.0)\n",
            "Requirement already satisfied: torch<3.0.0,>=1.6.0 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (2.5.1+cu124)\n",
            "Requirement already satisfied: tqdm<5.0.0,>=4.27.0 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (4.67.1)\n",
            "Requirement already satisfied: typeguard<5.0.0,>=4.1.0 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (4.4.2)\n",
            "Requirement already satisfied: typing-extensions<5.0.0,>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from rectools[torch]) (4.12.2)\n",
            "Requirement already satisfied: threadpoolctl in /usr/local/lib/python3.11/dist-packages (from implicit<0.8.0,>=0.7.1->rectools[torch]) (3.5.0)\n",
            "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools[torch]) (2.8.2)\n",
            "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools[torch]) (2025.1)\n",
            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas<3.0.0,>=1.5.0->rectools[torch]) (2025.1)\n",
            "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.8.2->rectools[torch]) (0.7.0)\n",
            "Requirement already satisfied: PyYAML>=5.4 in /usr/local/lib/python3.11/dist-packages (from pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (6.0.2)\n",
            "Requirement already satisfied: fsspec>=2022.5.0 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (2024.10.0)\n",
            "Requirement already satisfied: torchmetrics>=0.7.0 in /usr/local/lib/python3.11/dist-packages (from pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (1.6.1)\n",
            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (24.2)\n",
            "Requirement already satisfied: lightning-utilities>=0.10.0 in /usr/local/lib/python3.11/dist-packages (from pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (0.12.0)\n",
            "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (3.17.0)\n",
            "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (3.4.2)\n",
            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (3.1.5)\n",
            "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (9.1.0.70)\n",
            "Requirement already satisfied: nvidia-cublas-cu12==12.4.5.8 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (12.4.5.8)\n",
            "Requirement already satisfied: nvidia-cufft-cu12==11.2.1.3 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (11.2.1.3)\n",
            "Requirement already satisfied: nvidia-curand-cu12==10.3.5.147 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (10.3.5.147)\n",
            "Requirement already satisfied: nvidia-cusolver-cu12==11.6.1.9 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (11.6.1.9)\n",
            "Requirement already satisfied: nvidia-cusparse-cu12==12.3.1.170 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (12.3.1.170)\n",
            "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (2.21.5)\n",
            "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (12.4.127)\n",
            "Requirement already satisfied: nvidia-nvjitlink-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (12.4.127)\n",
            "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (3.1.0)\n",
            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch<3.0.0,>=1.6.0->rectools[torch]) (1.13.1)\n",
            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch<3.0.0,>=1.6.0->rectools[torch]) (1.3.0)\n",
            "Requirement already satisfied: aiohttp!=4.0.0a0,!=4.0.0a1 in /usr/local/lib/python3.11/dist-packages (from fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (3.11.12)\n",
            "Requirement already satisfied: setuptools in /usr/local/lib/python3.11/dist-packages (from lightning-utilities>=0.10.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (75.1.0)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas<3.0.0,>=1.5.0->rectools[torch]) (1.17.0)\n",
            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch<3.0.0,>=1.6.0->rectools[torch]) (3.0.2)\n",
            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (2.4.6)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (1.3.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (1.5.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (6.1.0)\n",
            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (0.2.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (1.18.3)\n",
            "Requirement already satisfied: idna>=2.0 in /usr/local/lib/python3.11/dist-packages (from yarl<2.0,>=1.17.0->aiohttp!=4.0.0a0,!=4.0.0a1->fsspec[http]>=2022.5.0->pytorch-lightning<3.0.0,>=1.6.0->rectools[torch]) (3.10)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "'''По факту от пользователя требуется обычная таблица, где каждая строка отражает одно взаимодействие: в первом столбце id юзера, во втором — айтема,\n",
        " а в третьем — скор взаимодействия (например, купил/ не купил). Если есть данные по времени, их тоже можно добавить. '''"
      ],
      "metadata": {
        "id": "lU1LEYZONAHR"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "\n",
        "from implicit.nearest_neighbours import TFIDFRecommender\n",
        "\n",
        "from rectools import Columns\n",
        "from rectools.dataset import Dataset\n",
        "from rectools.metrics import (\n",
        "    Precision,\n",
        "    NDCG,\n",
        "    AvgRecPopularity,\n",
        "    Intersection,\n",
        "    HitRate,\n",
        "    SufficientReco,\n",
        "    DebiasConfig,\n",
        "    IntraListDiversity,\n",
        "    Serendipity,\n",
        "    calc_metrics,\n",
        ")\n",
        "from rectools.metrics.distances import PairwiseHammingDistanceCalculator\n",
        "from rectools.models import *"
      ],
      "metadata": {
        "id": "XdEtxqrdRX7H"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "ratings = pd.read_csv(\n",
        "    \"ratings.dat\",\n",
        "    sep=\"::\",\n",
        "    engine=\"python\",  # Because of 2-chars separators\n",
        "    header=None,\n",
        "    names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],\n",
        ")\n",
        "print(ratings.shape)\n",
        "ratings.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 223
        },
        "id": "9rkswbQDRg5B",
        "outputId": "74bb481e-157e-4159-ec70-ec7606ad6041"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(1000209, 4)\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "   user_id  item_id  weight   datetime\n",
              "0        1     1193       5  978300760\n",
              "1        1      661       3  978302109\n",
              "2        1      914       3  978301968\n",
              "3        1     3408       4  978300275\n",
              "4        1     2355       5  978824291"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-8c025fcc-d8aa-45f3-88e9-0fe4862f2838\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>user_id</th>\n",
              "      <th>item_id</th>\n",
              "      <th>weight</th>\n",
              "      <th>datetime</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>1193</td>\n",
              "      <td>5</td>\n",
              "      <td>978300760</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>661</td>\n",
              "      <td>3</td>\n",
              "      <td>978302109</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1</td>\n",
              "      <td>914</td>\n",
              "      <td>3</td>\n",
              "      <td>978301968</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>3408</td>\n",
              "      <td>4</td>\n",
              "      <td>978300275</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1</td>\n",
              "      <td>2355</td>\n",
              "      <td>5</td>\n",
              "      <td>978824291</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8c025fcc-d8aa-45f3-88e9-0fe4862f2838')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-8c025fcc-d8aa-45f3-88e9-0fe4862f2838 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-8c025fcc-d8aa-45f3-88e9-0fe4862f2838');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-16fbc93f-8376-4bcf-b49a-299a438d26ba\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-16fbc93f-8376-4bcf-b49a-299a438d26ba')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-16fbc93f-8376-4bcf-b49a-299a438d26ba button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "ratings"
            }
          },
          "metadata": {},
          "execution_count": 5
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "ratings[\"datetime\"] = pd.to_datetime(ratings[\"datetime\"] * 10 ** 9)\n",
        "ratings[\"datetime\"].min(), ratings[\"datetime\"].max()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "H1Em3kpYRksu",
        "outputId": "8ddf600d-16e2-403e-8497-1de5ebedf264"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "(Timestamp('2000-04-25 23:05:32'), Timestamp('2003-02-28 17:49:50'))"
            ]
          },
          "metadata": {},
          "execution_count": 6
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "movies = pd.read_csv(\n",
        "    \"movies.dat\",\n",
        "    sep=\"::\",\n",
        "    engine=\"python\",  # Because of 2-chars separators\n",
        "    header=None,\n",
        "    names=[Columns.Item, \"title\", \"genres\"],\n",
        "    encoding_errors=\"ignore\",\n",
        ")\n",
        "print(movies.shape)\n",
        "split_dt = pd.Timestamp(\"2003-02-01\")\n",
        "df_train = ratings.loc[ratings[\"datetime\"] < split_dt]\n",
        "df_test = ratings.loc[ratings[\"datetime\"] >= split_dt]\n",
        "dataset = Dataset.construct(df_train)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "-bR-lgPXSXvA",
        "outputId": "a5e079e4-143f-4a6c-b4c5-b23f56f4d7c2"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(3883, 3)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from rectools.models import LightFMWrapperModel\n",
        "from lightfm import LightFM\n",
        "\n",
        "model = LightFMWrapperModel(\n",
        "        # внутри модели указываем параметр no_components\n",
        "        # это размезность эмбеддингов, которые выучит модель\n",
        "        model=LightFM(no_components = 30)\n",
        "        )\n",
        "\n",
        "model.fit(dataset)\n",
        "recos = model.recommend(\n",
        "    users=ratings[Columns.User].unique(),\n",
        "    dataset=dataset,\n",
        "    k=5,\n",
        "    filter_viewed=True,\n",
        ")\n",
        "\n",
        "serendipity = Serendipity(k=10)\n",
        "precision = Precision(k=10, r_precision=True)  # r_precision means division by min(k, n_user_test_items)\n",
        "ndcg = NDCG(k=10, log_base=3)\n",
        "\n",
        "movies[\"genre\"] = movies[\"genres\"].str.split(\"|\")\n",
        "genre_exploded = movies[[\"item_id\", \"genre\"]].set_index(\"item_id\").explode(\"genre\")\n",
        "genre_dummies = pd.get_dummies(genre_exploded, prefix=\"\", prefix_sep=\"\").groupby(\"item_id\").sum()\n",
        "\n",
        "precision_value = precision.calc(reco=recos, interactions=df_test)\n",
        "print(f\"precision: {precision_value}\")\n",
        "\n",
        "catalog = df_train[Columns.Item].unique()\n",
        "\n",
        "serendipity_value = serendipity.calc(\n",
        "    reco=recos,\n",
        "    interactions=df_test,\n",
        "    prev_interactions=df_train,\n",
        "    catalog=catalog\n",
        ")\n",
        "print(\"Serendipity: \", serendipity_value)\n",
        "\n",
        "print(\"NDCG: \", ndcg.calc(reco=recos, interactions=df_test))\n",
        "\n",
        "distance_calculator = PairwiseHammingDistanceCalculator(genre_dummies)\n",
        "ild = IntraListDiversity(k=10, distance_calculator=distance_calculator)\n",
        "print(\"ILD: \", ild.calc(reco=recos))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "c3yLXDPcfcS6",
        "outputId": "07f3b781-b6d6-4c20-daf8-6b89c3d17c75"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "precision: 0.032996632996633\n",
            "Serendipity:  4.163643706464913e-06\n",
            "NDCG:  0.041591868823295505\n",
            "ILD:  3.4595364238410595\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "model = SASRecModel(\n",
        "    session_max_len=20,\n",
        "    loss=\"softmax\",\n",
        "    n_factors=64,\n",
        "    n_blocks=1,\n",
        "    n_heads=4,\n",
        "    dropout_rate=0.2,\n",
        "    lr=0.001,\n",
        "    batch_size=128,\n",
        "    epochs=6,\n",
        "    verbose=1,\n",
        "    deterministic=True,\n",
        ")\n",
        "\n",
        "model.fit(dataset)\n",
        "recos = model.recommend(\n",
        "    users=ratings[Columns.User].unique(),\n",
        "    dataset=dataset,\n",
        "    k=5,\n",
        "    filter_viewed=True,\n",
        ")\n",
        "\n",
        "serendipity = Serendipity(k=10)\n",
        "precision = Precision(k=10, r_precision=True)  # r_precision means division by min(k, n_user_test_items)\n",
        "ndcg = NDCG(k=10, log_base=3)\n",
        "\n",
        "movies[\"genre\"] = movies[\"genres\"].str.split(\"|\")\n",
        "genre_exploded = movies[[\"item_id\", \"genre\"]].set_index(\"item_id\").explode(\"genre\")\n",
        "genre_dummies = pd.get_dummies(genre_exploded, prefix=\"\", prefix_sep=\"\").groupby(\"item_id\").sum()\n",
        "\n",
        "precision_value = precision.calc(reco=recos, interactions=df_test)\n",
        "print(f\"precision: {precision_value}\")\n",
        "\n",
        "catalog = df_train[Columns.Item].unique()\n",
        "\n",
        "serendipity_value = serendipity.calc(\n",
        "    reco=recos,\n",
        "    interactions=df_test,\n",
        "    prev_interactions=df_train,\n",
        "    catalog=catalog\n",
        ")\n",
        "print(\"Serendipity: \", serendipity_value)\n",
        "\n",
        "print(\"NDCG: \", ndcg.calc(reco=recos, interactions=df_test))\n",
        "\n",
        "distance_calculator = PairwiseHammingDistanceCalculator(genre_dummies)\n",
        "ild = IntraListDiversity(k=10, distance_calculator=distance_calculator)\n",
        "print(\"ILD: \", ild.calc(reco=recos))"
      ],
      "metadata": {
        "id": "ufAFUYjdjXZx",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 555,
          "referenced_widgets": [
            "894f0647a28f4449a63e680841518b0d",
            "707e977651d6492082bb6615ba6b7d6e",
            "732150fa2e1a43c7aa61719cb083d6ac",
            "86eae60d0e184e27a6d1f73bc7f37560",
            "8ad5f7a38f9d4b4780a3a45a9842a56d",
            "fb6e2cca3ca84dafbf09599481314a2a",
            "3188c60873aa4d2faa5be0ea04ea3e09",
            "befb029ec4814bf8b12ed8b7c222d8ab",
            "f801d767b2d446229f30c418604c6d84",
            "b459c7018ee94121891d27687ba5ae59",
            "d1dad85ea0ae46fab93b0cc0cbc86310"
          ]
        },
        "outputId": "fc606408-dce5-4aba-ac04-568c1261096f"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False\n",
            "INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
            "INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs\n",
            "/usr/local/lib/python3.11/dist-packages/rectools/dataset/identifiers.py:60: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.\n",
            "  unq_values = pd.unique(values)\n",
            "/usr/local/lib/python3.11/dist-packages/rectools/models/nn/item_net.py:129: UserWarning: Ignoring `CatFeaturesItemNet` block because dataset doesn't contain item features.\n",
            "  warnings.warn(explanation)\n",
            "/usr/local/lib/python3.11/dist-packages/pydantic/main.py:426: UserWarning: Pydantic serializer warnings:\n",
            "  Expected `str` but got `tuple` with value `('rectools.models.nn.item...net.CatFeaturesItemNet')` - serialized value may not be as expected\n",
            "  return self.__pydantic_serializer__.to_python(\n",
            "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n",
            "INFO:pytorch_lightning.callbacks.model_summary:\n",
            "  | Name        | Type                     | Params | Mode \n",
            "-----------------------------------------------------------------\n",
            "0 | torch_model | TransformerTorchBackbone | 242 K  | train\n",
            "-----------------------------------------------------------------\n",
            "242 K     Trainable params\n",
            "0         Non-trainable params\n",
            "242 K     Total params\n",
            "0.972     Total estimated model params size (MB)\n",
            "23        Modules in train mode\n",
            "0         Modules in eval mode\n",
            "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/loops/fit_loop.py:310: The number of training batches (48) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "Training: |          | 0/? [00:00<?, ?it/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "894f0647a28f4449a63e680841518b0d"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=6` reached.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "precision: 0.04781144781144782\n",
            "Serendipity:  4.502274814780399e-05\n",
            "NDCG:  0.03513844945033891\n",
            "ILD:  3.287748344370861\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "model = BERT4RecModel(\n",
        "    mask_prob=0.15,  # specify probability of masking tokens\n",
        "    deterministic=True,\n",
        ")\n",
        "\n",
        "model.fit(dataset)\n",
        "recos = model.recommend(\n",
        "    users=ratings[Columns.User].unique(),\n",
        "    dataset=dataset,\n",
        "    k=5,\n",
        "    filter_viewed=True,\n",
        ")\n",
        "\n",
        "serendipity = Serendipity(k=10)\n",
        "precision = Precision(k=10, r_precision=True)  # r_precision means division by min(k, n_user_test_items)\n",
        "ndcg = NDCG(k=10, log_base=3)\n",
        "\n",
        "movies[\"genre\"] = movies[\"genres\"].str.split(\"|\")\n",
        "genre_exploded = movies[[\"item_id\", \"genre\"]].set_index(\"item_id\").explode(\"genre\")\n",
        "genre_dummies = pd.get_dummies(genre_exploded, prefix=\"\", prefix_sep=\"\").groupby(\"item_id\").sum()\n",
        "\n",
        "precision_value = precision.calc(reco=recos, interactions=df_test)\n",
        "print(f\"precision: {precision_value}\")\n",
        "\n",
        "catalog = df_train[Columns.Item].unique()\n",
        "\n",
        "serendipity_value = serendipity.calc(\n",
        "    reco=recos,\n",
        "    interactions=df_test,\n",
        "    prev_interactions=df_train,\n",
        "    catalog=catalog\n",
        ")\n",
        "print(\"Serendipity: \", serendipity_value)\n",
        "\n",
        "print(\"NDCG: \", ndcg.calc(reco=recos, interactions=df_test))\n",
        "\n",
        "distance_calculator = PairwiseHammingDistanceCalculator(genre_dummies)\n",
        "ild = IntraListDiversity(k=10, distance_calculator=distance_calculator)\n",
        "print(\"ILD: \", ild.calc(reco=recos))"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "PL_0puloZAbQ",
        "outputId": "7f563b85-dcb7-4194-a3ba-cfdb00980806"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False\n",
            "INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores\n",
            "INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs\n",
            "/usr/local/lib/python3.11/dist-packages/rectools/dataset/identifiers.py:60: FutureWarning: unique with argument that is not not a Series, Index, ExtensionArray, or np.ndarray is deprecated and will raise in a future version.\n",
            "  unq_values = pd.unique(values)\n",
            "/usr/local/lib/python3.11/dist-packages/rectools/models/nn/item_net.py:129: UserWarning: Ignoring `CatFeaturesItemNet` block because dataset doesn't contain item features.\n",
            "  warnings.warn(explanation)\n",
            "/usr/local/lib/python3.11/dist-packages/pydantic/main.py:426: UserWarning: Pydantic serializer warnings:\n",
            "  Expected `str` but got `tuple` with value `('rectools.models.nn.item...net.CatFeaturesItemNet')` - serialized value may not be as expected\n",
            "  return self.__pydantic_serializer__.to_python(\n",
            "/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/configuration_validator.py:70: You defined a `validation_step` but have no `val_dataloader`. Skipping val loop.\n",
            "INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=3` reached.\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "precision: 0.02630270963604297\n",
            "Serendipity:  1.1552771056779242e-05\n",
            "NDCG:  0.03232912926419424\n",
            "ILD:  3.031059602649006\n"
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "\n",
        "  \\begin{array}{|c|c|c|c|c|c|c|c|}\\hline\\\\ \\\\\n",
        "  \\mathcal{} & Movielens 1m & Precision & Serendipity & NDCG & ILD  \\\\ \\hline\\\\\n",
        "  a & SASRec (6 epochs) & 0.04781 & 4.5023e-05 & 0.03513 & 3.2877   \\\\ \\hline\\\\ \\\\\n",
        "  b & BERT4Rec & 0.02630 & 1.1553e-05 & 0.03233 & 3.0311 \\\\ \\hline\\\\ \\\\\n",
        "  c & implicit ALS Wrapper & 0.0515 & 5.4056e-05 & 0.05165 & 2.8392 \\\\ \\hline\\\\ \\\\\n",
        "  d & implicit BPR-MF Wrapper & 0.02997 & 3.19868e-06 & 0.03615 & 3.86506 \\\\ \\hline\\\\ \\\\\n",
        "  e & implicit ItemKNN Wrapper & 0.0456 & 3.0093e-05 & 0.04615 & 3.1726 \\\\ \\hline\\\\ \\\\\n",
        "  f & LightFM Wrapper & 0.05858 & 4.7578e-06 & 0.0604 & 3.6395 \\\\ \\hline\\\\ \\\\\n",
        "  g & EASE & 0.0367 & 3.0522e-05 & 0.03431 & 2.8200 \\\\ \\hline\\\\ \\\\\n",
        "  h & PureSVD & 0.05952 & 2.5205e-05 & 0.05248 & 3.0020 \\\\ \\hline\\\\ \\\\\n",
        "  j & Popular & 0.0330 & 4.1636e-06 & 0.04160 & 3.4595 \\\\ \\hline\\\\ \\\\\n",
        "  l & Random & 0.0131 & 1.6940e-05 & 0.00487 & 2.6273 \\\\ \\hline\n",
        "  \\end{array}"
      ],
      "metadata": {
        "id": "bmPZPxd8cQ0o"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Детекция ботов"
      ],
      "metadata": {
        "id": "MnzenpVK1RUw"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import pandas as pd\n",
        "\n",
        "from implicit.nearest_neighbours import TFIDFRecommender\n",
        "\n",
        "from rectools import Columns\n",
        "from rectools.dataset import Dataset\n",
        "from rectools.metrics import (\n",
        "    Precision,\n",
        "    NDCG,\n",
        "    IntraListDiversity,\n",
        "    Serendipity,\n",
        "    calc_metrics,\n",
        ")\n",
        "from rectools.metrics.distances import PairwiseHammingDistanceCalculator\n",
        "from rectools.models import LightFMWrapperModel\n",
        "from lightfm import LightFM\n",
        "\n",
        "# 1️⃣ Загрузка данных\n",
        "ratings = pd.read_csv(\n",
        "    \"ratings.dat\",\n",
        "    sep=\"::\",\n",
        "    engine=\"python\",\n",
        "    header=None,\n",
        "    names=[Columns.User, Columns.Item, Columns.Weight, Columns.Datetime],\n",
        ")\n",
        "\n",
        "ratings[\"datetime\"] = pd.to_datetime(ratings[\"datetime\"] * 10 ** 9)\n",
        "\n",
        "movies = pd.read_csv(\n",
        "    \"movies.dat\",\n",
        "    sep=\"::\",\n",
        "    engine=\"python\",\n",
        "    header=None,\n",
        "    names=[Columns.Item, \"title\", \"genres\"],\n",
        "    encoding_errors=\"ignore\",\n",
        ")\n",
        "\n",
        "# 2️⃣ ДЕТЕКЦИЯ БОТОВ\n",
        "\n",
        "# 2.1 Определение аномальной активности пользователей\n",
        "user_activity = ratings.groupby(Columns.User).agg(\n",
        "    num_ratings=(Columns.Item, \"count\"),  # Количество оценок\n",
        "    unique_movies=(Columns.Item, \"nunique\"),  # Уникальные фильмы\n",
        "    rating_std=(Columns.Weight, \"std\"),  # Разброс оценок\n",
        "    first_rating_time=(\"datetime\", \"min\"),\n",
        "    last_rating_time=(\"datetime\", \"max\")\n",
        ")\n",
        "\n",
        "# 2.2 Добавляем скорость оценивания (оценки в день)\n",
        "user_activity[\"rating_speed\"] = user_activity[\"num_ratings\"] / (\n",
        "    (user_activity[\"last_rating_time\"] - user_activity[\"first_rating_time\"]).dt.days + 1\n",
        ")\n",
        "\n",
        "# 2.3 Удаление ботов по критериям:\n",
        "thresholds = {\n",
        "    \"num_ratings\": user_activity[\"num_ratings\"].quantile(0.99),  # > 99-го перцентиля\n",
        "    \"unique_movies\": user_activity[\"unique_movies\"].quantile(0.01),  # < 1-го перцентиля\n",
        "    \"rating_std\": 0.1,  # Нет разброса в оценках\n",
        "    \"rating_speed\": user_activity[\"rating_speed\"].quantile(0.99),  # > 99-го перцентиля\n",
        "}\n",
        "\n",
        "bots = user_activity[\n",
        "    (user_activity[\"num_ratings\"] > thresholds[\"num_ratings\"]) |\n",
        "    (user_activity[\"unique_movies\"] < thresholds[\"unique_movies\"]) |\n",
        "    (user_activity[\"rating_std\"] < thresholds[\"rating_std\"]) |\n",
        "    (user_activity[\"rating_speed\"] > thresholds[\"rating_speed\"])\n",
        "]\n",
        "\n",
        "# Удаляем ботов\n",
        "ratings_cleaned = ratings[~ratings[Columns.User].isin(bots.index)]\n",
        "print(f\"Удалено {len(bots)} подозрительных пользователей.\")\n",
        "\n",
        "# 3️⃣ Деление на train/test\n",
        "split_dt = pd.Timestamp(\"2003-02-01\")\n",
        "df_train = ratings_cleaned.loc[ratings_cleaned[\"datetime\"] < split_dt]\n",
        "df_test = ratings_cleaned.loc[ratings_cleaned[\"datetime\"] >= split_dt]\n",
        "\n",
        "# 4️⃣ Обучение LightFM\n",
        "dataset = Dataset.construct(df_train)\n",
        "\n",
        "model = LightFMWrapperModel(\n",
        "    model=LightFM(no_components=30)\n",
        ")\n",
        "model.fit(dataset)\n",
        "\n",
        "# 5️⃣ Генерация рекомендаций\n",
        "recos = model.recommend(\n",
        "    users=df_test[Columns.User].unique(),\n",
        "    dataset=dataset,\n",
        "    k=5,\n",
        "    filter_viewed=True,\n",
        ")\n",
        "\n",
        "# 6️⃣ Оценка метрик\n",
        "serendipity = Serendipity(k=10)\n",
        "precision = Precision(k=10, r_precision=True)\n",
        "ndcg = NDCG(k=10, log_base=3)\n",
        "\n",
        "movies[\"genre\"] = movies[\"genres\"].str.split(\"|\")\n",
        "genre_exploded = movies[[Columns.Item, \"genre\"]].set_index(Columns.Item).explode(\"genre\")\n",
        "genre_dummies = pd.get_dummies(genre_exploded, prefix=\"\", prefix_sep=\"\").groupby(Columns.Item).sum()\n",
        "\n",
        "precision_value = precision.calc(reco=recos, interactions=df_test)\n",
        "print(f\"precision: {precision_value}\")\n",
        "\n",
        "catalog = df_train[Columns.Item].unique()\n",
        "serendipity_value = serendipity.calc(\n",
        "    reco=recos,\n",
        "    interactions=df_test,\n",
        "    prev_interactions=df_train,\n",
        "    catalog=catalog\n",
        ")\n",
        "print(\"Serendipity: \", serendipity_value)\n",
        "\n",
        "print(\"NDCG: \", ndcg.calc(reco=recos, interactions=df_test))\n",
        "\n",
        "distance_calculator = PairwiseHammingDistanceCalculator(genre_dummies)\n",
        "ild = IntraListDiversity(k=10, distance_calculator=distance_calculator)\n",
        "print(\"ILD: \", ild.calc(reco=recos))\n"
      ],
      "metadata": {
        "id": "-uNXsWUD1TxJ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "H2aMMUEh1me4"
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "# NLP NIR Natasha"
      ],
      "metadata": {
        "id": "YLFxW6OTmkSz"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install natasha"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "SDr6wjRTmtVG",
        "outputId": "4fbf4295-a7ba-4682-8198-7f8236b4c262"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting natasha\n",
            "  Downloading natasha-1.6.0-py3-none-any.whl.metadata (23 kB)\n",
            "Collecting pymorphy2 (from natasha)\n",
            "  Downloading pymorphy2-0.9.1-py3-none-any.whl.metadata (3.6 kB)\n",
            "Collecting razdel>=0.5.0 (from natasha)\n",
            "  Downloading razdel-0.5.0-py3-none-any.whl.metadata (10.0 kB)\n",
            "Collecting navec>=0.9.0 (from natasha)\n",
            "  Downloading navec-0.10.0-py3-none-any.whl.metadata (21 kB)\n",
            "Collecting slovnet>=0.6.0 (from natasha)\n",
            "  Downloading slovnet-0.6.0-py3-none-any.whl.metadata (34 kB)\n",
            "Collecting yargy>=0.16.0 (from natasha)\n",
            "  Downloading yargy-0.16.0-py3-none-any.whl.metadata (3.5 kB)\n",
            "Collecting ipymarkup>=0.8.0 (from natasha)\n",
            "  Downloading ipymarkup-0.9.0-py3-none-any.whl.metadata (5.6 kB)\n",
            "Collecting intervaltree>=3 (from ipymarkup>=0.8.0->natasha)\n",
            "  Downloading intervaltree-3.1.0.tar.gz (32 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from navec>=0.9.0->natasha) (1.26.4)\n",
            "Collecting dawg-python>=0.7.1 (from pymorphy2->natasha)\n",
            "  Downloading DAWG_Python-0.7.2-py2.py3-none-any.whl.metadata (7.0 kB)\n",
            "Collecting pymorphy2-dicts-ru<3.0,>=2.4 (from pymorphy2->natasha)\n",
            "  Downloading pymorphy2_dicts_ru-2.4.417127.4579844-py2.py3-none-any.whl.metadata (2.1 kB)\n",
            "Collecting docopt>=0.6 (from pymorphy2->natasha)\n",
            "  Downloading docopt-0.6.2.tar.gz (25 kB)\n",
            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "Requirement already satisfied: sortedcontainers<3.0,>=2.0 in /usr/local/lib/python3.11/dist-packages (from intervaltree>=3->ipymarkup>=0.8.0->natasha) (2.4.0)\n",
            "Downloading natasha-1.6.0-py3-none-any.whl (34.4 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.4/34.4 MB\u001b[0m \u001b[31m32.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading ipymarkup-0.9.0-py3-none-any.whl (14 kB)\n",
            "Downloading navec-0.10.0-py3-none-any.whl (23 kB)\n",
            "Downloading razdel-0.5.0-py3-none-any.whl (21 kB)\n",
            "Downloading slovnet-0.6.0-py3-none-any.whl (46 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m46.7/46.7 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading yargy-0.16.0-py3-none-any.whl (33 kB)\n",
            "Downloading pymorphy2-0.9.1-py3-none-any.whl (55 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m55.5/55.5 kB\u001b[0m \u001b[31m3.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading DAWG_Python-0.7.2-py2.py3-none-any.whl (11 kB)\n",
            "Downloading pymorphy2_dicts_ru-2.4.417127.4579844-py2.py3-none-any.whl (8.2 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.2/8.2 MB\u001b[0m \u001b[31m23.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hBuilding wheels for collected packages: docopt, intervaltree\n",
            "  Building wheel for docopt (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for docopt: filename=docopt-0.6.2-py2.py3-none-any.whl size=13706 sha256=1c63601457de054370a39a15b73fae84c828da1412d67838ceaca15fd87d8aec\n",
            "  Stored in directory: /root/.cache/pip/wheels/1a/b0/8c/4b75c4116c31f83c8f9f047231251e13cc74481cca4a78a9ce\n",
            "  Building wheel for intervaltree (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for intervaltree: filename=intervaltree-3.1.0-py2.py3-none-any.whl size=26097 sha256=4f8268b1c102f6a686fac1ad2b6f5e31d2e0c2c5f6fe16af2f6177850adac975\n",
            "  Stored in directory: /root/.cache/pip/wheels/31/d7/d9/eec6891f78cac19a693bd40ecb8365d2f4613318c145ec9816\n",
            "Successfully built docopt intervaltree\n",
            "Installing collected packages: razdel, pymorphy2-dicts-ru, docopt, dawg-python, pymorphy2, navec, intervaltree, yargy, slovnet, ipymarkup, natasha\n",
            "Successfully installed dawg-python-0.7.2 docopt-0.6.2 intervaltree-3.1.0 ipymarkup-0.9.0 natasha-1.6.0 navec-0.10.0 pymorphy2-0.9.1 pymorphy2-dicts-ru-2.4.417127.4579844 razdel-0.5.0 slovnet-0.6.0 yargy-0.16.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "from natasha import (\n",
        "    Doc,\n",
        "    Segmenter,\n",
        "    MorphVocab,\n",
        "    NewsEmbedding,\n",
        "    NewsMorphTagger,\n",
        "    NewsSyntaxParser,\n",
        "    NewsNERTagger,\n",
        ")\n",
        "\n",
        "# Инициализация компонентов Natasha\n",
        "segmenter = Segmenter()\n",
        "morph_vocab = MorphVocab()\n",
        "emb = NewsEmbedding()\n",
        "morph_tagger = NewsMorphTagger(emb)\n",
        "syntax_parser = NewsSyntaxParser(emb)\n",
        "ner_tagger = NewsNERTagger(emb)\n",
        "\n",
        "# Функция для обработки предложения и извлечения сущностей\n",
        "def extract_entities(sentence, sentence_id):\n",
        "    doc = Doc(sentence)\n",
        "    doc.segment(segmenter)\n",
        "    doc.tag_morph(morph_tagger)\n",
        "    doc.tag_ner(ner_tagger)\n",
        "\n",
        "    entities = []\n",
        "    for span in doc.spans:\n",
        "        entities.append({\n",
        "            'sentence_id': sentence_id,\n",
        "            'entity': span.text,\n",
        "            'entity_type': span.type\n",
        "        })\n",
        "    return entities\n",
        "\n",
        "# Пример предложения\n",
        "sentence = \"Бурятия и Забайкальский край переданы из Сибирского федерального округа (СФО) в состав Дальневосточного (ДФО). Соответствующий указ подписал президент Владимир Путин, документ опубликован на официальном интернет-портале правовой информации. Этим же указом глава государства поручил руководителю своей администрации утвердить структуру и штатную численность аппаратов полномочных представителей президента в этих двух округах. После исключения Бурятии и Забайкалья в составе СФО остались десять регионов: Алтай, Алтайский край, Иркутская, Кемеровская, Новосибирская, Омская и Томская области, Красноярский край, Тува и Хакасия. Действующим полпредом президента в этом округе является бывший губернатор Севастополя, экс-заместитель командующего Черноморским флотом России Сергей Меняйло. В составе ДФО отныне 11 субъектов. Помимо Бурятии и Забайкалья, это Камчатский, Приморский и Хабаровский края, Амурская, Еврейская автономная, Магаданская и Сахалинская области, а также Якутия и Чукотка. Дальневосточное полпредство возглавляет Юрий Трутнев, совмещающий эту должность с постом вице-премьера в правительстве России. Федеральные округа были созданы в мае 2000 года в соответствии с указом президента Путина.\"\n",
        "sentence_id = 1  # ID предложения\n",
        "\n",
        "# Извлечение сущностей\n",
        "entities = extract_entities(sentence, sentence_id)\n",
        "\n",
        "# Создание DataFrame\n",
        "df = pd.DataFrame(entities)\n",
        "\n",
        "# Вывод результата\n",
        "df.head(20)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 677
        },
        "id": "wK8KTIjdpi4A",
        "outputId": "f4364c32-d6c0-4ecc-d1ec-89cebcaa48fb"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "    sentence_id                                entity entity_type\n",
              "0             1                               Бурятия         LOC\n",
              "1             1                    Забайкальский край         LOC\n",
              "2             1  Сибирского федерального округа (СФО)         LOC\n",
              "3             1                Дальневосточного (ДФО)         LOC\n",
              "4             1                        Владимир Путин         PER\n",
              "5             1                               Бурятии         LOC\n",
              "6             1                            Забайкалья         LOC\n",
              "7             1                                   СФО         LOC\n",
              "8             1                                 Алтай         LOC\n",
              "9             1                        Алтайский край         LOC\n",
              "10            1                             Иркутская         LOC\n",
              "11            1                           Кемеровская         LOC\n",
              "12            1                         Новосибирская         LOC\n",
              "13            1                                Омская         LOC\n",
              "14            1                       Томская области         LOC\n",
              "15            1                     Красноярский край         LOC\n",
              "16            1                                  Тува         LOC\n",
              "17            1                               Хакасия         LOC\n",
              "18            1                           Севастополя         LOC\n",
              "19            1                   Черноморским флотом         ORG"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-5cf7ac1e-d121-4fdc-8190-702fa17adfc7\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>sentence_id</th>\n",
              "      <th>entity</th>\n",
              "      <th>entity_type</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1</td>\n",
              "      <td>Бурятия</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>Забайкальский край</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1</td>\n",
              "      <td>Сибирского федерального округа (СФО)</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>Дальневосточного (ДФО)</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1</td>\n",
              "      <td>Владимир Путин</td>\n",
              "      <td>PER</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>1</td>\n",
              "      <td>Бурятии</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>6</th>\n",
              "      <td>1</td>\n",
              "      <td>Забайкалья</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>7</th>\n",
              "      <td>1</td>\n",
              "      <td>СФО</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>8</th>\n",
              "      <td>1</td>\n",
              "      <td>Алтай</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>9</th>\n",
              "      <td>1</td>\n",
              "      <td>Алтайский край</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>10</th>\n",
              "      <td>1</td>\n",
              "      <td>Иркутская</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>11</th>\n",
              "      <td>1</td>\n",
              "      <td>Кемеровская</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>12</th>\n",
              "      <td>1</td>\n",
              "      <td>Новосибирская</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>13</th>\n",
              "      <td>1</td>\n",
              "      <td>Омская</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>14</th>\n",
              "      <td>1</td>\n",
              "      <td>Томская области</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>15</th>\n",
              "      <td>1</td>\n",
              "      <td>Красноярский край</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>16</th>\n",
              "      <td>1</td>\n",
              "      <td>Тува</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>17</th>\n",
              "      <td>1</td>\n",
              "      <td>Хакасия</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>18</th>\n",
              "      <td>1</td>\n",
              "      <td>Севастополя</td>\n",
              "      <td>LOC</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>19</th>\n",
              "      <td>1</td>\n",
              "      <td>Черноморским флотом</td>\n",
              "      <td>ORG</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5cf7ac1e-d121-4fdc-8190-702fa17adfc7')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-5cf7ac1e-d121-4fdc-8190-702fa17adfc7 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-5cf7ac1e-d121-4fdc-8190-702fa17adfc7');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-4e91bba0-4203-4c2b-b7d3-01b72b192a2c\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-4e91bba0-4203-4c2b-b7d3-01b72b192a2c')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-4e91bba0-4203-4c2b-b7d3-01b72b192a2c button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "df",
              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 37,\n  \"fields\": [\n    {\n      \"column\": \"sentence_id\",\n      \"properties\": {\n        \"dtype\": \"number\",\n        \"std\": 0,\n        \"min\": 1,\n        \"max\": 1,\n        \"num_unique_values\": 1,\n        \"samples\": [\n          1\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"entity\",\n      \"properties\": {\n        \"dtype\": \"string\",\n        \"num_unique_values\": 34,\n        \"samples\": [\n          \"\\u041a\\u0440\\u0430\\u0441\\u043d\\u043e\\u044f\\u0440\\u0441\\u043a\\u0438\\u0439 \\u043a\\u0440\\u0430\\u0439\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"entity_type\",\n      \"properties\": {\n        \"dtype\": \"category\",\n        \"num_unique_values\": 3,\n        \"samples\": [\n          \"LOC\"\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 25
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Обработка + Word2Vec + TF-IDF"
      ],
      "metadata": {
        "id": "He3sYIMuYHdj"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install pymorphy3"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "L4drvZGJZDma",
        "outputId": "325b7fbc-ebe3-4c7b-8790-298ad296d167"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting pymorphy3\n",
            "  Downloading pymorphy3-2.0.3-py3-none-any.whl.metadata (1.9 kB)\n",
            "Collecting dawg2-python>=0.8.0 (from pymorphy3)\n",
            "  Downloading dawg2_python-0.9.0-py3-none-any.whl.metadata (7.5 kB)\n",
            "Collecting pymorphy3-dicts-ru (from pymorphy3)\n",
            "  Downloading pymorphy3_dicts_ru-2.4.417150.4580142-py2.py3-none-any.whl.metadata (2.0 kB)\n",
            "Downloading pymorphy3-2.0.3-py3-none-any.whl (53 kB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m53.8/53.8 kB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hDownloading dawg2_python-0.9.0-py3-none-any.whl (9.3 kB)\n",
            "Downloading pymorphy3_dicts_ru-2.4.417150.4580142-py2.py3-none-any.whl (8.4 MB)\n",
            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.4/8.4 MB\u001b[0m \u001b[31m45.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
            "\u001b[?25hInstalling collected packages: pymorphy3-dicts-ru, dawg2-python, pymorphy3\n",
            "Successfully installed dawg2-python-0.9.0 pymorphy3-2.0.3 pymorphy3-dicts-ru-2.4.417150.4580142\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "categories = {\n",
        "    \"Жизнь человека\": {\n",
        "        \"description\": \"Задания о ценности человеческой жизни, донорстве, осведомленности о заболеваниях\",\n",
        "        \"examples\": [\n",
        "            \"организация донорской акции\",\n",
        "            \"переливание крови\",\n",
        "            \"медицинское просвещение\",\n",
        "            \"профилактика заболеваний\",\n",
        "            \"первая помощь\"\n",
        "        ]\n",
        "    },\n",
        "    \"Достоинство человека\": {\n",
        "        \"description\": \"Задания об уважении к профессиям и людям разных социальных статусов\",\n",
        "        \"examples\": [\n",
        "            \"мастер-класс о уважении\",\n",
        "            \"социальный статус\",\n",
        "            \"профессиональная этика\",\n",
        "            \"равенство возможностей\",\n",
        "            \"толерантность\",\n",
        "            \"\"\n",
        "        ]\n",
        "    },\n",
        "    \"Права и свободы человека\": {\n",
        "        \"description\": \"Изучение и защита прав человека\",\n",
        "        \"examples\": [\n",
        "            \"тест о правах человека\",\n",
        "            \"конвенция о правах ребенка\",\n",
        "            \"правовая грамотность\",\n",
        "            \"защита свобод\",\n",
        "            \"гражданские права\"\n",
        "        ]\n",
        "    },\n",
        "    \"Патриотизм\": {\n",
        "        \"description\": \"История государства, предки, патриотические организации\",\n",
        "        \"examples\": [\n",
        "            \"Россия\",\n",
        "            \"Российская федерация\",\n",
        "            \"СССР\"\n",
        "            \"посещение военного музея\",\n",
        "            \"волонтерство 9 мая\",\n",
        "            \"история великой отечественной\",\n",
        "            \"патриотический флешмоб\",\n",
        "            \"герои россии\"\n",
        "        ]\n",
        "    },\n",
        "    \"Гражданственность\": {\n",
        "        \"description\": \"Процветание общества через личное участие\",\n",
        "        \"examples\": [\n",
        "            \"субботник\",\n",
        "            \"экологическая акция\",\n",
        "            \"гражданские инициативы\",\n",
        "            \"благоустройство города\",\n",
        "            \"общественный контроль\"\n",
        "        ]\n",
        "    },\n",
        "    \"Служение Отечеству и ответственность за его судьбу\": {\n",
        "        \"description\": \"История отечества и физическая подготовка граждан\",\n",
        "        \"examples\": [\n",
        "            \"военно-спортивные игры\",\n",
        "            \"уроки мужества\",\n",
        "            \"здоровье нации\",\n",
        "            \"историческая реконструкция\",\n",
        "            \"вахта памяти\"\n",
        "        ]\n",
        "    },\n",
        "    \"Высокие нравственные идеалы\": {\n",
        "        \"description\": \"Культура, идеи и творчество, формирующие мораль\",\n",
        "        \"examples\": [\n",
        "            \"обсуждение классической литературы\",\n",
        "            \"нравственные дилеммы\",\n",
        "            \"этические нормы\",\n",
        "            \"моральный выбор\",\n",
        "            \"духовные ценности\",\n",
        "            \"история\",\n",
        "            \"страны\"\n",
        "        ]\n",
        "    },\n",
        "    \"Крепкая семья\": {\n",
        "        \"description\": \"Совместная семейная деятельность и отношения\",\n",
        "        \"examples\": [\n",
        "            \"семейный квест\",\n",
        "            \"генеалогическое древо\",\n",
        "            \"совместный пикник\",\n",
        "            \"семейные традиции\",\n",
        "            \"родительский день\"\n",
        "        ]\n",
        "    },\n",
        "    \"Созидательный труд\": {\n",
        "        \"description\": \"Обучение практическим навыкам и физическая помощь\",\n",
        "        \"examples\": [\n",
        "            \"мастер-класс по ремеслам\",\n",
        "            \"помощь пожилым\",\n",
        "            \"трудовой десант\",\n",
        "            \"профессиональные пробы\",\n",
        "            \"социальный труд\"\n",
        "        ]\n",
        "    },\n",
        "    \"Приоритет духовного над материальным\": {\n",
        "        \"description\": \"Помощь нуждающимся вместо материальных благ\",\n",
        "        \"examples\": [\n",
        "            \"благотворительная акция\",\n",
        "            \"помощь приюту\",\n",
        "            \"духовные практики\",\n",
        "            \"волонтерство в храме\",\n",
        "            \"нематериальные ценности\"\n",
        "        ]\n",
        "    },\n",
        "    \"Гуманизм\": {\n",
        "        \"description\": \"Помощь людям и добрые дела\",\n",
        "        \"examples\": [\n",
        "            \"волонтерство в приюте\",\n",
        "            \"помощь бездомным\",\n",
        "            \"добрые письма\",\n",
        "            \"поддержка инвалидов\",\n",
        "            \"человеколюбие\"\n",
        "        ]\n",
        "    },\n",
        "    \"Милосердие\": {\n",
        "        \"description\": \"Помощь окружающим и информирование о болезнях\",\n",
        "        \"examples\": [\n",
        "            \"сбор вещей нуждающимся\",\n",
        "            \"помощь пожилым\",\n",
        "            \"донорство органов\",\n",
        "            \"паллиативная помощь\",\n",
        "            \"социальная поддержка\"\n",
        "        ]\n",
        "    },\n",
        "    \"Справедливость\": {\n",
        "        \"description\": \"Обсуждение и укрепление справедливости\",\n",
        "        \"examples\": [\n",
        "            \"дебаты о равенстве\",\n",
        "            \"правовые квесты\",\n",
        "            \"честное распределение\",\n",
        "            \"борьба с дискриминацией\",\n",
        "            \"равные возможности\"\n",
        "        ]\n",
        "    },\n",
        "    \"Коллективизм\": {\n",
        "        \"description\": \"Командная работа и взаимодействие в группе\",\n",
        "        \"examples\": [\n",
        "            \"групповой турпоход\",\n",
        "            \"командный квиз\",\n",
        "            \"совместный проект\",\n",
        "            \"работа в команде\",\n",
        "            \"коллективное решение\"\n",
        "        ]\n",
        "    },\n",
        "    \"Взаимопомощь и взаимоуважение\": {\n",
        "        \"description\": \"Волонтерство и поддержка окружающих\",\n",
        "        \"examples\": [\n",
        "            \"помощь одноклассникам\",\n",
        "            \"тьюторство\",\n",
        "            \"шефская помощь\",\n",
        "            \"социальное наставничество\",\n",
        "            \"взаимная поддержка\"\n",
        "        ]\n",
        "    },\n",
        "    \"Историческая память и преемственность поколений\": {\n",
        "        \"description\": \"История семьи и страны, связь поколений\",\n",
        "        \"examples\": [\n",
        "            \"интервью с ветеранами\",\n",
        "            \"семейная летопись\",\n",
        "            \"музей истории\",\n",
        "            \"реконструкция событий\",\n",
        "            \"устная история\"\n",
        "        ]\n",
        "    },\n",
        "    \"Единство народов России\": {\n",
        "        \"description\": \"Культурное многообразие и национальные традиции\",\n",
        "        \"examples\": [\n",
        "            \"фестиваль культур\",\n",
        "            \"этнографический музей\",\n",
        "            \"национальные ремесла\",\n",
        "            \"межнациональный диалог\",\n",
        "            \"традиционные обычаи\"\n",
        "        ]\n",
        "    }\n",
        "}"
      ],
      "metadata": {
        "id": "97It6DvYZYOP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "import numpy as np\n",
        "import re\n",
        "from string import punctuation\n",
        "import pymorphy3\n",
        "from sklearn.feature_extraction.text import TfidfVectorizer\n",
        "from sklearn.metrics.pairwise import cosine_similarity\n",
        "import nltk\n",
        "from nltk.corpus import stopwords\n",
        "\n",
        "nltk.download('stopwords')\n",
        "\n",
        "morph = pymorphy3.MorphAnalyzer()\n",
        "russian_stopwords = stopwords.words('russian')\n",
        "\n",
        "def preprocess(text):\n",
        "    text = text.lower()\n",
        "    text = re.sub(f'[{punctuation}»«–…№©™•°]', '', text)\n",
        "    words = re.findall(r'\\b[a-zа-яё]+\\b', text)\n",
        "    lemmas = []\n",
        "    for word in words:\n",
        "        if word not in russian_stopwords and len(word) > 2:\n",
        "            lemma = morph.parse(word)[0].normal_form\n",
        "            lemmas.append(lemma)\n",
        "    return ' '.join(lemmas)\n",
        "\n",
        "# Создание корпуса документов\n",
        "category_docs = []\n",
        "for cat, data_s in categories.items():\n",
        "    doc = f\"{cat} {data_s['description']} {' '.join(data_s['examples'])}\"\n",
        "    category_docs.append(doc)\n",
        "\n",
        "# Инициализация TF-IDF\n",
        "vectorizer = TfidfVectorizer(tokenizer=preprocess)\n",
        "tfidf_matrix = vectorizer.fit_transform(category_docs)\n",
        "\n",
        "def classify_text(text):\n",
        "    processed_text = preprocess(text)\n",
        "\n",
        "    text_vector = vectorizer.transform([processed_text])\n",
        "    print(text_vector)\n",
        "    similarities = cosine_similarity(text_vector, tfidf_matrix)\n",
        "    print(similarities)\n",
        "    best_match_idx = similarities.argmax()\n",
        "    best_category = list(categories.keys())[best_match_idx]\n",
        "    accuracy = similarities[0][best_match_idx]\n",
        "\n",
        "\n",
        "    return best_category, accuracy"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "MHBcm352YSBS",
        "outputId": "95184d17-ba84-43e4-8128-47b4e7160cbb"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
            "[nltk_data]   Package stopwords is already up-to-date!\n",
            "/usr/local/lib/python3.11/dist-packages/sklearn/feature_extraction/text.py:517: UserWarning: The parameter 'token_pattern' will not be used since 'tokenizer' is not None'\n",
            "  warnings.warn(\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "input_text = \"Организация группового турпохода с одноклассниками и работа в команде\"\n",
        "result = classify_text(input_text)\n",
        "result"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 408
        },
        "id": "-MdsShmMZPJV",
        "outputId": "6e4e1588-2856-499d-9167-b3ee1805a692"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "error",
          "ename": "ValueError",
          "evalue": "setting an array element with a sequence.",
          "traceback": [
            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
            "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
            "\u001b[0;31mTypeError\u001b[0m: float() argument must be a string or a real number, not 'csr_matrix'",
            "\nThe above exception was the direct cause of the following exception:\n",
            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
            "\u001b[0;32m<ipython-input-35-0374233296a0>\u001b[0m in \u001b[0;36m<cell line: 0>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0minput_text\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"Организация группового турпохода с одноклассниками и работа в команде\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mclassify_text_x\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minput_text\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      3\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-32-f3d2d04f2468>\u001b[0m in \u001b[0;36mclassify_text_x\u001b[0;34m(text)\u001b[0m\n\u001b[1;32m     46\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     47\u001b[0m     \u001b[0;31m# Считаем сходство с каждым классом\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 48\u001b[0;31m     similarities = {cat: cosine_similarity(text_vector.reshape(1, -1), cat_vector.reshape(1, -1))[0][0] \n\u001b[0m\u001b[1;32m     49\u001b[0m                     for cat, cat_vector in category_vectors.items()}\n\u001b[1;32m     50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m<ipython-input-32-f3d2d04f2468>\u001b[0m in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m     46\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     47\u001b[0m     \u001b[0;31m# Считаем сходство с каждым классом\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 48\u001b[0;31m     similarities = {cat: cosine_similarity(text_vector.reshape(1, -1), cat_vector.reshape(1, -1))[0][0] \n\u001b[0m\u001b[1;32m     49\u001b[0m                     for cat, cat_vector in category_vectors.items()}\n\u001b[1;32m     50\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/sklearn/utils/_param_validation.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    214\u001b[0m                     )\n\u001b[1;32m    215\u001b[0m                 ):\n\u001b[0;32m--> 216\u001b[0;31m                     \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    217\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mInvalidParameterError\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    218\u001b[0m                 \u001b[0;31m# When the function is just a wrapper around an estimator, we allow\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mcosine_similarity\u001b[0;34m(X, Y, dense_output)\u001b[0m\n\u001b[1;32m   1739\u001b[0m     \u001b[0;31m# to avoid recursive import\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1740\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1741\u001b[0;31m     \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcheck_pairwise_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mY\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1742\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1743\u001b[0m     \u001b[0mX_normalized\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnormalize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/sklearn/metrics/pairwise.py\u001b[0m in \u001b[0;36mcheck_pairwise_arrays\u001b[0;34m(X, Y, precomputed, dtype, accept_sparse, force_all_finite, ensure_all_finite, ensure_2d, copy)\u001b[0m\n\u001b[1;32m    198\u001b[0m         )\n\u001b[1;32m    199\u001b[0m     \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 200\u001b[0;31m         X = check_array(\n\u001b[0m\u001b[1;32m    201\u001b[0m             \u001b[0mX\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    202\u001b[0m             \u001b[0maccept_sparse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maccept_sparse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/sklearn/utils/validation.py\u001b[0m in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_all_finite, ensure_non_negative, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[1;32m   1053\u001b[0m                     \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mxp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1054\u001b[0m                 \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1055\u001b[0;31m                     \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_asarray_with_order\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mxp\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mxp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1056\u001b[0m             \u001b[0;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mcomplex_warning\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1057\u001b[0m                 raise ValueError(\n",
            "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/sklearn/utils/_array_api.py\u001b[0m in \u001b[0;36m_asarray_with_order\u001b[0;34m(array, dtype, order, copy, xp, device)\u001b[0m\n\u001b[1;32m    837\u001b[0m             \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    838\u001b[0m         \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 839\u001b[0;31m             \u001b[0marray\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marray\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0morder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0morder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    840\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    841\u001b[0m         \u001b[0;31m# At this point array is a NumPy ndarray. We convert it to an array\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
            "\u001b[0;31mValueError\u001b[0m: setting an array element with a sequence."
          ]
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# Определение похожих запросов"
      ],
      "metadata": {
        "id": "GutjrXHHb0_B"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import torch\n",
        "from sentence_transformers import SentenceTransformer\n",
        "from sklearn.metrics.pairwise import cosine_similarity\n",
        "import pandas as pd\n",
        "\n",
        "# ✅ Загружаем предобученную модель, поддерживающую русский язык\n",
        "model = SentenceTransformer(\"sentence-transformers/paraphrase-multilingual-mpnet-base-v2\")\n",
        "\n",
        "# 📌 Пример запросов\n",
        "queries = [\n",
        "    \"Как приготовить борщ?\",\n",
        "    \"Рецепт вкусного борща\",\n",
        "    \"Где найти рецепт лазаньи?\",\n",
        "    \"Как сварить суп?\"\n",
        "]\n",
        "\n",
        "# 🔥 Получаем эмбеддинги\n",
        "query_embeddings = model.encode(queries, convert_to_tensor=True)\n",
        "\n",
        "# 🔥 Вычисляем матрицу косинусного сходства\n",
        "similarity_matrix = cosine_similarity(query_embeddings.cpu().numpy())\n",
        "\n",
        "# 📌 Выводим результаты\n",
        "df = pd.DataFrame(similarity_matrix, index=queries, columns=queries)\n",
        "print(df)\n"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 560,
          "referenced_widgets": [
            "dca4feb8999943458c52402e4821fd9b",
            "962de14c45d440f981cab7f98bb999d4",
            "e1670826b6984372b3e1f6d38dc775ce",
            "3cd0ed60902944d39d3f0c1acd41cdc3",
            "a73eb0974895406f923d3db27483084f",
            "c7645ed1ba1a4401be3523e96b611adb",
            "d5d2baac77dd489c911e851aea84b2e8",
            "de9347d60d8b43888b3603bbfbceb2af",
            "a627519796034c88a763f9cd2ec0808e",
            "7a868d4c9e3f4a70961c2d056d6a5653",
            "b2b43b9c16664528a1c55e66805eff76",
            "d02d64dc3c9a499491861bef90086d82",
            "7aad2011378e434eac5d46c19a117631",
            "208acf230279450ca859204736946a28",
            "641efe28133240ccbcff70a3d4424249",
            "95152a8da2dd4333afeb475aec2b3def",
            "0388f73993794aa3aba737c2ff7a06e7",
            "89414b1d7b06439c8bdb33948be5d11e",
            "d1f4213d8cbe4416a26520c67b8898ef",
            "d3d80ca0e7dd4ec396e96892ec86e17c",
            "8843fcd259744d94979cc460e24f5ba3",
            "0de221a4d2d644c885c0d3f82f8b924c",
            "9ed3dd803a884c52aff984be748b0fa2",
            "d8d8af05c11e4ecfa4c75f9eea5f581a",
            "134b61665b5f42ebaa18710e78e4b37a",
            "8257e6f8f14c4754a938dd2ac67002aa",
            "c16eb49089ee4cbfb42b58b09c529608",
            "be6a7355b7be437586bb693fecae802d",
            "4969a761d9f34a62b5502683ad93a723",
            "f39aa003b8b243efa9992302d9c39ecb",
            "6556302f910349f7aea105bcc4a4e516",
            "819d6c6459fc4a71bf37cce6daf4cd04",
            "6a5b78e5f5834238b48b3b893d1a797c",
            "d2acc67c459840c5872a0aed088a673a",
            "b22bd6adc4944ac2a8223d1fc54e278e",
            "9eed2253e2794b96b078174c26f1599e",
            "9c12caeb617e4a079bb4960698a9addf",
            "f96eeb6fb26243a0988eed5281101c36",
            "543c6458885e4c92911ac85a6bceff99",
            "be795953cbe942d891d3e2e1a857b1c7",
            "cffa853377644f2f8a8bba31795146a0",
            "08a68525c0174d77b8fa1f7a16b37f3d",
            "e5f8df883014424997370c858ca75119",
            "980b8f08ae1f441ba4fcd9fec7bba172",
            "fb399b6ccf14488e8f3a5ee25ba1f519",
            "9689a4b5fc3443c988355d51fb8b31dd",
            "07c7545951bd45838b37656a7d55c4f5",
            "375f551994f141a197d717059513a15a",
            "f8e89fd727a747d1a205f0ea3452a94a",
            "b0d6e4b6564c46348efe5ee5e24a5a36",
            "ab1c746c9a414d16a11db2ecad0797ab",
            "7297eacf7ca34b3bba0029375b8c811a",
            "0075b2bdafa04862b6a871356351dfa1",
            "64db601a4cca46a882e32f771c3dba3d",
            "80946e3d99314550b777b8a62ffe0521",
            "f810aade9a2943aaa5949f30774ab26f",
            "ea5a3b55a5bc450aa0f92c88625778f8",
            "ba21fc338bfc4269bef02e50c0711b7c",
            "5a7c1b813c5b4ceea534f74765c5e605",
            "894779b9a1384b668593ae969055c32c",
            "75d1b0ba8e7048cfb30e507ec3f24ff8",
            "59dda900f0894c54b6ce7302e761ad43",
            "bd87f489e77d44d9b1e066f16c0ca9ed",
            "4e0bd61e94594f5db415e7d5315de7c0",
            "c239ef650cb54478825730509e2a1bd2",
            "ee1503ea36ab41d2a48155fbe442d909",
            "8798e6b2ae124152bb6cca66b14c0d3a",
            "ba5e2e03809a428796f8905392eae113",
            "27067ff09cbe4ad091ffdd18ea30bc17",
            "d722357cd295474290b5dc0ebd704f4c",
            "d89ed637839f4c9abd4268f3d71f3eab",
            "63c52b32dbae41b4a82020cbf030ddb4",
            "3adaba744a5740318b2cf4188429acc0",
            "cc0fc21f8f2d43b4859f1886a33cf0d9",
            "d696138c3379401fae995f49d0d4cd48",
            "e291c590d5ad4f62b29f65a451fdea09",
            "f42a6bf637f846659b0f7d7d198e6eb4",
            "4a74fa41f5d8471e8db6f79ee06d88a1",
            "df3a741ce3334174a1ba1c185bcf7e4d",
            "27e51c8b4ed54557987dc6362be11609",
            "af075877b5864bd3b6f98b13695f1e0d",
            "b8935da2360a4a8f8be0f1cf12049704",
            "0007b3003cf94edfbe879278e68ad521",
            "91d456c10ac14ddda54da8f6b0d6c45a",
            "304f170991ee4e538908b36bc5ee5c71",
            "e1cebdb893b64ef0ac977255e216143a",
            "b7327644f1a14c9f9b620747902501f1",
            "ef7f978170c746f09aaa8f1450c02548",
            "757581a0b5ca4ab5af853223f95e21e4",
            "ace6230a55254740919ad2ee94309bff",
            "569e73779e594bc0acb72bdcec240c26",
            "506eec7a17cd4657a36d3970c81689b0",
            "d0cb73af415c49f8887fe530cb58e218",
            "b285870fcfd04f13a064b4f41bcdc15c",
            "3b63a5eb40bc46dbb59ddf0dab56f961",
            "39abfc34a8de449ab918374cd617983d",
            "6dbd7ac42eae437092baca2fcc660b50",
            "31434f024801490aa52a110bb0814510",
            "61f56526d84e409d879cfd9935eb5084",
            "ffdf2ed2b8d2412997a9845a85d1f4ab",
            "5aa4451625f04032a62b35829b5cbfa3",
            "e8202c4e66814d07bf4e7e980e930a90",
            "a54eef1414914da0981e3037d19e79bd",
            "8dcd120dfe98453b9f4e9cb145696940",
            "1741b6afdc08426ea2f7058457413999",
            "c87cf21dbf074c699c1c57aa604940e6",
            "3dec68173930485d82b3a0d53deda040",
            "320ea31c60b34983bf6b9c1a6d258eeb",
            "1d7d3a59a8cc4e03825aebbd9fa8bdb6",
            "07215f1e31c44c8893b096808be4d96e",
            "438a2b975062455fb5fc06eb3cdaf625",
            "0445438e0b9c4ac680f5e82183708492",
            "5d0f54bb9df64b8786538424532d6397",
            "a325f3831f664a0c96061d16e1d9c358",
            "aac58743d8d64b8591abe9d421a43565",
            "aa49127f1f4d4b168a5d5b69dc7fed0d",
            "fe79e4d23eb64d82bd818ac4c369303a",
            "3d60222523434c1fbc1647b104d2af20",
            "44dba6a745254c3ab17a76ba184b4ecd",
            "d530476ab4934024a5753e5a520d7772",
            "6bc1fefb830a4636b06a0f692bf7d71b"
          ]
        },
        "id": "oED-a0bsb8ap",
        "outputId": "b894e758-5c3d-4a9d-8793-36a9bf77bbb1"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "dca4feb8999943458c52402e4821fd9b"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "config_sentence_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "d02d64dc3c9a499491861bef90086d82"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "README.md:   0%|          | 0.00/4.13k [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "9ed3dd803a884c52aff984be748b0fa2"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "d2acc67c459840c5872a0aed088a673a"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "config.json:   0%|          | 0.00/723 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "fb399b6ccf14488e8f3a5ee25ba1f519"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "f810aade9a2943aaa5949f30774ab26f"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer_config.json:   0%|          | 0.00/402 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "8798e6b2ae124152bb6cca66b14c0d3a"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "4a74fa41f5d8471e8db6f79ee06d88a1"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "tokenizer.json:   0%|          | 0.00/9.08M [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "757581a0b5ca4ab5af853223f95e21e4"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "ffdf2ed2b8d2412997a9845a85d1f4ab"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "1_Pooling%2Fconfig.json:   0%|          | 0.00/190 [00:00<?, ?B/s]"
            ],
            "application/vnd.jupyter.widget-view+json": {
              "version_major": 2,
              "version_minor": 0,
              "model_id": "438a2b975062455fb5fc06eb3cdaf625"
            }
          },
          "metadata": {}
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                           Как приготовить борщ?  Рецепт вкусного борща  \\\n",
            "Как приготовить борщ?                   1.000000               0.809563   \n",
            "Рецепт вкусного борща                   0.809563               1.000000   \n",
            "Где найти рецепт лазаньи?               0.617756               0.644937   \n",
            "Как сварить суп?                        0.768549               0.594278   \n",
            "\n",
            "                           Где найти рецепт лазаньи?  Как сварить суп?  \n",
            "Как приготовить борщ?                       0.617756          0.768549  \n",
            "Рецепт вкусного борща                       0.644937          0.594278  \n",
            "Где найти рецепт лазаньи?                   1.000000          0.557159  \n",
            "Как сварить суп?                            0.557159          1.000000  \n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df.head()"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 175
        },
        "id": "VmZEvdLecCHc",
        "outputId": "0f175449-e624-4c5b-b640-e855fd1f62a4"
      },
      "execution_count": null,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "                           Как приготовить борщ?  Рецепт вкусного борща  \\\n",
              "Как приготовить борщ?                   1.000000               0.809563   \n",
              "Рецепт вкусного борща                   0.809563               1.000000   \n",
              "Где найти рецепт лазаньи?               0.617756               0.644937   \n",
              "Как сварить суп?                        0.768549               0.594278   \n",
              "\n",
              "                           Где найти рецепт лазаньи?  Как сварить суп?  \n",
              "Как приготовить борщ?                       0.617756          0.768549  \n",
              "Рецепт вкусного борща                       0.644937          0.594278  \n",
              "Где найти рецепт лазаньи?                   1.000000          0.557159  \n",
              "Как сварить суп?                            0.557159          1.000000  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-d402c3e3-fea7-4cab-89de-f8f9c89e2b36\" class=\"colab-df-container\">\n",
              "    <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Как приготовить борщ?</th>\n",
              "      <th>Рецепт вкусного борща</th>\n",
              "      <th>Где найти рецепт лазаньи?</th>\n",
              "      <th>Как сварить суп?</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>Как приготовить борщ?</th>\n",
              "      <td>1.000000</td>\n",
              "      <td>0.809563</td>\n",
              "      <td>0.617756</td>\n",
              "      <td>0.768549</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Рецепт вкусного борща</th>\n",
              "      <td>0.809563</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>0.644937</td>\n",
              "      <td>0.594278</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Где найти рецепт лазаньи?</th>\n",
              "      <td>0.617756</td>\n",
              "      <td>0.644937</td>\n",
              "      <td>1.000000</td>\n",
              "      <td>0.557159</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Как сварить суп?</th>\n",
              "      <td>0.768549</td>\n",
              "      <td>0.594278</td>\n",
              "      <td>0.557159</td>\n",
              "      <td>1.000000</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "    <div class=\"colab-df-buttons\">\n",
              "\n",
              "  <div class=\"colab-df-container\">\n",
              "    <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-d402c3e3-fea7-4cab-89de-f8f9c89e2b36')\"\n",
              "            title=\"Convert this dataframe to an interactive table.\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
              "    <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
              "  </svg>\n",
              "    </button>\n",
              "\n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    .colab-df-buttons div {\n",
              "      margin-bottom: 4px;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "    <script>\n",
              "      const buttonEl =\n",
              "        document.querySelector('#df-d402c3e3-fea7-4cab-89de-f8f9c89e2b36 button.colab-df-convert');\n",
              "      buttonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "      async function convertToInteractive(key) {\n",
              "        const element = document.querySelector('#df-d402c3e3-fea7-4cab-89de-f8f9c89e2b36');\n",
              "        const dataTable =\n",
              "          await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                    [key], {});\n",
              "        if (!dataTable) return;\n",
              "\n",
              "        const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "          '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "          + ' to learn more about interactive tables.';\n",
              "        element.innerHTML = '';\n",
              "        dataTable['output_type'] = 'display_data';\n",
              "        await google.colab.output.renderOutput(dataTable, element);\n",
              "        const docLink = document.createElement('div');\n",
              "        docLink.innerHTML = docLinkHtml;\n",
              "        element.appendChild(docLink);\n",
              "      }\n",
              "    </script>\n",
              "  </div>\n",
              "\n",
              "\n",
              "<div id=\"df-0c926298-4792-4163-a178-6dfea498c374\">\n",
              "  <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-0c926298-4792-4163-a178-6dfea498c374')\"\n",
              "            title=\"Suggest charts\"\n",
              "            style=\"display:none;\">\n",
              "\n",
              "<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "     width=\"24px\">\n",
              "    <g>\n",
              "        <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
              "    </g>\n",
              "</svg>\n",
              "  </button>\n",
              "\n",
              "<style>\n",
              "  .colab-df-quickchart {\n",
              "      --bg-color: #E8F0FE;\n",
              "      --fill-color: #1967D2;\n",
              "      --hover-bg-color: #E2EBFA;\n",
              "      --hover-fill-color: #174EA6;\n",
              "      --disabled-fill-color: #AAA;\n",
              "      --disabled-bg-color: #DDD;\n",
              "  }\n",
              "\n",
              "  [theme=dark] .colab-df-quickchart {\n",
              "      --bg-color: #3B4455;\n",
              "      --fill-color: #D2E3FC;\n",
              "      --hover-bg-color: #434B5C;\n",
              "      --hover-fill-color: #FFFFFF;\n",
              "      --disabled-bg-color: #3B4455;\n",
              "      --disabled-fill-color: #666;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart {\n",
              "    background-color: var(--bg-color);\n",
              "    border: none;\n",
              "    border-radius: 50%;\n",
              "    cursor: pointer;\n",
              "    display: none;\n",
              "    fill: var(--fill-color);\n",
              "    height: 32px;\n",
              "    padding: 0;\n",
              "    width: 32px;\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart:hover {\n",
              "    background-color: var(--hover-bg-color);\n",
              "    box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "    fill: var(--button-hover-fill-color);\n",
              "  }\n",
              "\n",
              "  .colab-df-quickchart-complete:disabled,\n",
              "  .colab-df-quickchart-complete:disabled:hover {\n",
              "    background-color: var(--disabled-bg-color);\n",
              "    fill: var(--disabled-fill-color);\n",
              "    box-shadow: none;\n",
              "  }\n",
              "\n",
              "  .colab-df-spinner {\n",
              "    border: 2px solid var(--fill-color);\n",
              "    border-color: transparent;\n",
              "    border-bottom-color: var(--fill-color);\n",
              "    animation:\n",
              "      spin 1s steps(1) infinite;\n",
              "  }\n",
              "\n",
              "  @keyframes spin {\n",
              "    0% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "      border-left-color: var(--fill-color);\n",
              "    }\n",
              "    20% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    30% {\n",
              "      border-color: transparent;\n",
              "      border-left-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    40% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-top-color: var(--fill-color);\n",
              "    }\n",
              "    60% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "    }\n",
              "    80% {\n",
              "      border-color: transparent;\n",
              "      border-right-color: var(--fill-color);\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "    90% {\n",
              "      border-color: transparent;\n",
              "      border-bottom-color: var(--fill-color);\n",
              "    }\n",
              "  }\n",
              "</style>\n",
              "\n",
              "  <script>\n",
              "    async function quickchart(key) {\n",
              "      const quickchartButtonEl =\n",
              "        document.querySelector('#' + key + ' button');\n",
              "      quickchartButtonEl.disabled = true;  // To prevent multiple clicks.\n",
              "      quickchartButtonEl.classList.add('colab-df-spinner');\n",
              "      try {\n",
              "        const charts = await google.colab.kernel.invokeFunction(\n",
              "            'suggestCharts', [key], {});\n",
              "      } catch (error) {\n",
              "        console.error('Error during call to suggestCharts:', error);\n",
              "      }\n",
              "      quickchartButtonEl.classList.remove('colab-df-spinner');\n",
              "      quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
              "    }\n",
              "    (() => {\n",
              "      let quickchartButtonEl =\n",
              "        document.querySelector('#df-0c926298-4792-4163-a178-6dfea498c374 button');\n",
              "      quickchartButtonEl.style.display =\n",
              "        google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "    })();\n",
              "  </script>\n",
              "</div>\n",
              "    </div>\n",
              "  </div>\n"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "dataframe",
              "variable_name": "df",
              "summary": "{\n  \"name\": \"df\",\n  \"rows\": 4,\n  \"fields\": [\n    {\n      \"column\": \"\\u041a\\u0430\\u043a \\u043f\\u0440\\u0438\\u0433\\u043e\\u0442\\u043e\\u0432\\u0438\\u0442\\u044c \\u0431\\u043e\\u0440\\u0449?\",\n      \"properties\": {\n        \"dtype\": \"float32\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          0.8095631003379822,\n          0.7685485482215881,\n          0.9999996423721313\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"\\u0420\\u0435\\u0446\\u0435\\u043f\\u0442 \\u0432\\u043a\\u0443\\u0441\\u043d\\u043e\\u0433\\u043e \\u0431\\u043e\\u0440\\u0449\\u0430\",\n      \"properties\": {\n        \"dtype\": \"float32\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          0.9999998807907104,\n          0.5942782163619995,\n          0.8095631003379822\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"\\u0413\\u0434\\u0435 \\u043d\\u0430\\u0439\\u0442\\u0438 \\u0440\\u0435\\u0446\\u0435\\u043f\\u0442 \\u043b\\u0430\\u0437\\u0430\\u043d\\u044c\\u0438?\",\n      \"properties\": {\n        \"dtype\": \"float32\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          0.6449370980262756,\n          0.5571587085723877,\n          0.6177558302879333\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    },\n    {\n      \"column\": \"\\u041a\\u0430\\u043a \\u0441\\u0432\\u0430\\u0440\\u0438\\u0442\\u044c \\u0441\\u0443\\u043f?\",\n      \"properties\": {\n        \"dtype\": \"float32\",\n        \"num_unique_values\": 4,\n        \"samples\": [\n          0.5942782163619995,\n          1.000000238418579,\n          0.7685485482215881\n        ],\n        \"semantic_type\": \"\",\n        \"description\": \"\"\n      }\n    }\n  ]\n}"
            }
          },
          "metadata": {},
          "execution_count": 3
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "# rubert (semi-final version; sentiment analysys/text classification)"
      ],
      "metadata": {
        "id": "u9NimG14vOGS"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "import pandas as pd\n",
        "from transformers import BertTokenizer, BertForSequenceClassification\n",
        "from torch.utils.data import DataLoader, Dataset\n",
        "import torch\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.metrics import f1_score"
      ],
      "metadata": {
        "id": "gQCIOMBERGSm"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train=pd.read_csv(r'C:\\VisualCode\\T1\\tr.csv', engine='python', encoding='utf-8', on_bad_lines=\"skip\")\n",
        "test=pd.read_csv(r'C:\\VisualCode\\T1\\ts.csv', engine='python', encoding='utf-8', on_bad_lines = \"skip\")\n",
        "\n",
        "# Разделение данных на признаки и метки\n",
        "X_train, X_val, y_train, y_val = train_test_split(train['review'], train['sentiment'], test_size=0.2, random_state=42)"
      ],
      "metadata": {
        "id": "-QcGnSdXRLV1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "class ReviewsDataset(Dataset):\n",
        "    def __init__(self, reviews, labels=None, tokenizer=None, max_len=128):\n",
        "        self.reviews = reviews\n",
        "        self.labels = labels\n",
        "        self.tokenizer = tokenizer\n",
        "        self.max_len = max_len\n",
        "\n",
        "    def __len__(self):\n",
        "        return len(self.reviews)\n",
        "\n",
        "    def __getitem__(self, idx):\n",
        "        review = self.reviews.iloc[idx]\n",
        "        inputs = self.tokenizer.encode_plus(\n",
        "            review,\n",
        "            add_special_tokens=True,\n",
        "            max_length=self.max_len,\n",
        "            truncation=True,\n",
        "            padding='max_length',\n",
        "            return_tensors='pt'\n",
        "        )\n",
        "        item = {key: val.squeeze(0) for key, val in inputs.items()}\n",
        "        if self.labels is not None:\n",
        "            item['labels'] = torch.tensor(self.labels.iloc[idx], dtype=torch.long)\n",
        "        return item\n"
      ],
      "metadata": {
        "id": "qIaQzApYRRv1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "tokenizer = BertTokenizer.from_pretrained(\"DeepPavlov/rubert-base-cased\")\n",
        "model = BertForSequenceClassification.from_pretrained(\n",
        "    \"DeepPavlov/rubert-base-cased\",\n",
        "    num_labels=3  # У нас три класса: 0, 1, 2\n",
        ")"
      ],
      "metadata": {
        "id": "qvgwDYwURVWX"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "train_dataset = ReviewsDataset(X_train, y_train, tokenizer)\n",
        "val_dataset = ReviewsDataset(X_val, y_val, tokenizer)\n",
        "test_dataset = ReviewsDataset(test['review'], tokenizer=tokenizer)\n",
        "\n",
        "train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)\n",
        "val_loader = DataLoader(val_dataset, batch_size=16)\n",
        "test_loader = DataLoader(test_dataset, batch_size=16)"
      ],
      "metadata": {
        "id": "JvYRnhONRYcD"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import AdamW\n",
        "\n",
        "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
        "model = model.to(device)\n",
        "\n",
        "optimizer = AdamW(model.parameters(), lr=2e-5)\n",
        "\n",
        "# Функция обучения\n",
        "def train_epoch(model, data_loader, optimizer, device):\n",
        "    model.train()\n",
        "    total_loss = 0\n",
        "    for batch in data_loader:\n",
        "        inputs = {key: val.to(device) for key, val in batch.items() if key != 'labels'}\n",
        "        labels = batch['labels'].to(device)\n",
        "\n",
        "        outputs = model(**inputs, labels=labels)\n",
        "        loss = outputs.loss\n",
        "        total_loss += loss.item()\n",
        "\n",
        "        loss.backward()\n",
        "        optimizer.step()\n",
        "        optimizer.zero_grad()\n",
        "    return total_loss / len(data_loader)"
      ],
      "metadata": {
        "id": "jiY9w624RZ1_"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "import time\n",
        "\n",
        "epochs = 3  # Можно увеличить для лучшего результата\n",
        "\n",
        "for epoch in range(epochs):\n",
        "    start_time = time.time()\n",
        "\n",
        "    train_loss = train_epoch(model, train_loader, optimizer, device)\n",
        "\n",
        "    end_time = time.time()\n",
        "    epoch_duration = end_time - start_time\n",
        "\n",
        "    hours = int(epoch_duration // 3600)\n",
        "    minutes = int((epoch_duration % 3600) // 60)\n",
        "    seconds = int(epoch_duration % 60)\n",
        "\n",
        "    torch.save(model.state_dict(), f\"model_epoch_{epoch+1}.pth\")\n",
        "    print(f\"Эпоха {epoch + 1}/{3} завершена.\")\n",
        "    print(f\"Потеря: {train_loss:.4f}, Время выполнения: {epoch_duration:.2f} секунд.\")\n"
      ],
      "metadata": {
        "id": "PumWrG3ZRb-p"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "def predict(model, data_loader, device):\n",
        "    model.eval()\n",
        "    predictions = []\n",
        "    with torch.no_grad():\n",
        "        for batch in data_loader:\n",
        "            inputs = {key: val.to(device) for key, val in batch.items()}\n",
        "            outputs = model(**inputs)\n",
        "            logits = outputs.logits\n",
        "            preds = torch.argmax(logits, dim=1).cpu().numpy()\n",
        "            predictions.extend(preds)\n",
        "    return predictions\n",
        "\n",
        "# Предсказания\n",
        "test['sentiment'] = predict(model, test_loader, device)\n"
      ],
      "metadata": {
        "id": "rpg4htSyRfGX"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "ans = test[['index', 'sentiment']]\n",
        "ans.head(3)\n",
        "ans.to_csv('poputka_0.csv', index = False)"
      ],
      "metadata": {
        "id": "euO3RuywRhVF"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "# RuBert + Distilbert (for classification and sentiment analysys)"
      ],
      "metadata": {
        "id": "MSgsUE3a7F6V"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import RobertaTokenizer, RobertaForSequenceClassification\n",
        "from datasets import load_dataset\n",
        "from transformers import Trainer, TrainingArguments\n",
        "import torch\n",
        "\n",
        "# Загрузка данных\n",
        "train_dataset = load_dataset('csv', data_files='train.csv')['train']\n",
        "test_dataset = load_dataset('csv', data_files='test.csv')['test']\n",
        "\n",
        "# Токенизация\n",
        "tokenizer = RobertaTokenizer.from_pretrained('DeepPavlov/rubert-base-cased')\n",
        "\n",
        "def tokenize_function(examples):\n",
        "    return tokenizer(examples['text'], padding='max_length', truncation=True)\n",
        "\n",
        "train_dataset = train_dataset.map(tokenize_function, batched=True)\n",
        "test_dataset = test_dataset.map(tokenize_function, batched=True)\n",
        "\n",
        "# Загрузка модели для классификации текста\n",
        "model = RobertaForSequenceClassification.from_pretrained('DeepPavlov/rubert-base-cased', num_labels=3)  # например, 3 класса\n",
        "\n",
        "# Аргументы для тренировки\n",
        "training_args = TrainingArguments(\n",
        "    output_dir='./results',\n",
        "    num_train_epochs=3,\n",
        "    per_device_train_batch_size=8,\n",
        "    per_device_eval_batch_size=16,\n",
        "    evaluation_strategy=\"epoch\",\n",
        "    logging_dir='./logs',\n",
        ")\n",
        "\n",
        "# Создаем Trainer\n",
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=training_args,\n",
        "    train_dataset=train_dataset,\n",
        "    eval_dataset=test_dataset,\n",
        ")\n",
        "\n",
        "# Обучаем модель\n",
        "trainer.train()\n",
        "\n",
        "# Получаем предсказания\n",
        "predictions = trainer.predict(test_dataset)\n",
        "pred_labels = torch.argmax(predictions.predictions, axis=1)\n",
        "print(pred_labels)\n"
      ],
      "metadata": {
        "id": "vm4LZTDAJ3R9"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from transformers import DistilBertTokenizer, DistilBertForSequenceClassification\n",
        "from transformers import Trainer, TrainingArguments\n",
        "from datasets import load_dataset\n",
        "import torch\n",
        "\n",
        "# Загружаем датасет\n",
        "train_dataset = load_dataset('csv', data_files='train.csv')['train']\n",
        "test_dataset = load_dataset('csv', data_files='test.csv')['test']\n",
        "\n",
        "# Токенизация\n",
        "tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')\n",
        "\n",
        "def tokenize_function(examples):\n",
        "    return tokenizer(examples['text'], padding='max_length', truncation=True)\n",
        "\n",
        "train_dataset = train_dataset.map(tokenize_function, batched=True)\n",
        "test_dataset = test_dataset.map(tokenize_function, batched=True)\n",
        "\n",
        "# Загружаем модель для классификации текста\n",
        "model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=3)  # 3 класса для примера\n",
        "\n",
        "# Аргументы для тренировки\n",
        "training_args = TrainingArguments(\n",
        "    output_dir='./results',\n",
        "    num_train_epochs=3,\n",
        "    per_device_train_batch_size=8,\n",
        "    per_device_eval_batch_size=16,\n",
        "    evaluation_strategy=\"epoch\",\n",
        "    logging_dir='./logs',\n",
        ")\n",
        "\n",
        "# Создаем Trainer\n",
        "trainer = Trainer(\n",
        "    model=model,\n",
        "    args=training_args,\n",
        "    train_dataset=train_dataset,\n",
        "    eval_dataset=test_dataset,\n",
        ")\n",
        "\n",
        "# Обучаем модель\n",
        "trainer.train()\n",
        "\n",
        "# Получаем предсказания\n",
        "predictions = trainer.predict(test_dataset)\n",
        "pred_labels = torch.argmax(predictions.predictions, axis=1)\n",
        "print(pred_labels)\n"
      ],
      "metadata": {
        "id": "3ahKB4_NP4zY"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
}