{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU", "widgets": { "application/vnd.jupyter.widget-state+json": { "5479c909ae014cb4af686f98dfd896cb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_77cd1a0126ea48f6bc358c52b5b22f3e", "IPY_MODEL_452ca86ad5f44319a93031962f10fedf", "IPY_MODEL_fa32534d4cf349d2a1efac6496277d2e" ], "layout": "IPY_MODEL_cb16cc0f8c6847cda083dc0b2f186083" } }, "77cd1a0126ea48f6bc358c52b5b22f3e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3b59793ca26745ea8b3780e07447a56c", "placeholder": "​", "style": "IPY_MODEL_bf13f411d2db420fb25d13d212ba257a", "value": "Map: 100%" } }, "452ca86ad5f44319a93031962f10fedf": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_075f0860a2ca452c8b1438aeae8e3d18", "max": 119, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_555d876e65c9419b82633841914086d9", "value": 119 } }, "fa32534d4cf349d2a1efac6496277d2e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e07fb7d992e04473ad7bac9248e7aa75", "placeholder": "​", "style": "IPY_MODEL_4cf7dbf2cb6d4581a356970de1996ec6", "value": " 119/119 [00:00<00:00, 1144.30 examples/s]" } }, "cb16cc0f8c6847cda083dc0b2f186083": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "3b59793ca26745ea8b3780e07447a56c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bf13f411d2db420fb25d13d212ba257a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "075f0860a2ca452c8b1438aeae8e3d18": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "555d876e65c9419b82633841914086d9": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e07fb7d992e04473ad7bac9248e7aa75": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4cf7dbf2cb6d4581a356970de1996ec6": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "f13451f32d11428c97243b2bc5b5268c": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_02da0b8c8e564c05a91e5dd3b7e1c9cb", "IPY_MODEL_43e70aa073504a4496e543fd923b7b0f", "IPY_MODEL_7056a43eec3a4718b160360ce9493409" ], "layout": "IPY_MODEL_348abc5e3efe4fd69aee2329d91c17da" } }, "02da0b8c8e564c05a91e5dd3b7e1c9cb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_4ededf6d88554f5495f76521f8be98d5", "placeholder": "​", "style": "IPY_MODEL_46012658060c41e2a2af258e4025499e", "value": "model.safetensors: 100%" } }, "43e70aa073504a4496e543fd923b7b0f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_7c1de009823e492fb9a1648b393b449c", "max": 538090408, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_c4af106fdac144d3b3010ceffc63b4d7", "value": 538090408 } }, "7056a43eec3a4718b160360ce9493409": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b9b5a37d22644cacbc3e81a1682c5184", "placeholder": "​", "style": "IPY_MODEL_390ace0ab34e4727824de62d724767a5", "value": " 538M/538M [00:11<00:00, 52.9MB/s]" } }, "348abc5e3efe4fd69aee2329d91c17da": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4ededf6d88554f5495f76521f8be98d5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "46012658060c41e2a2af258e4025499e": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "7c1de009823e492fb9a1648b393b449c": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c4af106fdac144d3b3010ceffc63b4d7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "b9b5a37d22644cacbc3e81a1682c5184": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "390ace0ab34e4727824de62d724767a5": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d2eaa596146742e4937f7b1ec7320dbb": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_34f9fa16d57247829d00de248eb0e1ae", "IPY_MODEL_d40e8b6c471e4f4da2f7521955155955", "IPY_MODEL_82fd1f3b07f24c4c8a89454477ab6b97" ], "layout": "IPY_MODEL_b5b509b3f7bc4ad4b994c68317a07228" } }, "34f9fa16d57247829d00de248eb0e1ae": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e27f78d5d126483cb7be7c9e69913954", "placeholder": "​", "style": "IPY_MODEL_9b2a21eb504d49df8cbfd2d6bff0ddbf", "value": "Upload 2 LFS files: 100%" } }, "d40e8b6c471e4f4da2f7521955155955": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_43985ed44fc14c0dad5cf7cff9a47d40", "max": 2, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_59115af19cbe4420b6b1c12bf406f6ed", "value": 2 } }, "82fd1f3b07f24c4c8a89454477ab6b97": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e5b320112ff346529e6a936eab29d95d", "placeholder": "​", "style": "IPY_MODEL_4a78c564e91c4f048778f65e4905b22a", "value": " 2/2 [00:11<00:00, 11.77s/it]" } }, "b5b509b3f7bc4ad4b994c68317a07228": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e27f78d5d126483cb7be7c9e69913954": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9b2a21eb504d49df8cbfd2d6bff0ddbf": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "43985ed44fc14c0dad5cf7cff9a47d40": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "59115af19cbe4420b6b1c12bf406f6ed": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e5b320112ff346529e6a936eab29d95d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "4a78c564e91c4f048778f65e4905b22a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "86b0050f0417457784489f96c1313b4a": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_9196c063ec4741fcbc40b3c77eadc81e", "IPY_MODEL_b10f037ce64143cbbf713148a46c2a0d", "IPY_MODEL_338fcb1a304341759ce1d53093b48c8e" ], "layout": "IPY_MODEL_461f8d72aa934cb499b8c09031682a6d" } }, "9196c063ec4741fcbc40b3c77eadc81e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_82e726ec9ba24dd79c3b02388634d08d", "placeholder": "​", "style": "IPY_MODEL_9a4d24decc8e4bf0a066bbd5a968c3b2", "value": "training_args.bin: 100%" } }, "b10f037ce64143cbbf713148a46c2a0d": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9c08523e49844618953ab8dd28b9f6fd", "max": 5624, "min": 0, "orientation": "horizontal", "style": "IPY_MODEL_d578d568fd4f4ac6aba0b5de3d1a2c1d", "value": 5624 } }, "338fcb1a304341759ce1d53093b48c8e": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "model_module_version": "1.5.0", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_60f3ec480a6549908acc753e67b9c3f4", "placeholder": "​", "style": "IPY_MODEL_67cd4b245e02485b82f422c187cec1ae", "value": " 5.62k/5.62k [00:00<00:00, 29.0kB/s]" } }, "461f8d72aa934cb499b8c09031682a6d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "82e726ec9ba24dd79c3b02388634d08d": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "9a4d24decc8e4bf0a066bbd5a968c3b2": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "9c08523e49844618953ab8dd28b9f6fd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d578d568fd4f4ac6aba0b5de3d1a2c1d": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "60f3ec480a6549908acc753e67b9c3f4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "model_module_version": "1.2.0", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "67cd4b245e02485b82f422c187cec1ae": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "model_module_version": "1.5.0", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } } } }, "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3W3Z2pWzCxpq", "outputId": "e8a3235d-3433-4882-ad07-ef438ee4b704" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Requirement already satisfied: transformers in /usr/local/lib/python3.10/dist-packages (4.47.1)\n", "Collecting datasets\n", " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", "Collecting trl\n", " Downloading trl-0.13.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: huggingface_hub in /usr/local/lib/python3.10/dist-packages (0.27.1)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from transformers) (3.16.1)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (1.26.4)\n", "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from transformers) (24.2)\n", "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (6.0.2)\n", "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.10/dist-packages (from transformers) (2024.11.6)\n", "Requirement already satisfied: requests in /usr/local/lib/python3.10/dist-packages (from transformers) (2.32.3)\n", "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.21.0)\n", "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.10/dist-packages (from transformers) (0.5.1)\n", "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.10/dist-packages (from transformers) (4.67.1)\n", "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets) (17.0.0)\n", "Collecting dill<0.3.9,>=0.3.0 (from datasets)\n", " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets) (2.2.2)\n", "Collecting xxhash (from datasets)\n", " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", "Collecting multiprocess<0.70.17 (from datasets)\n", " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n", "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)\n", " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets) (3.11.11)\n", "Requirement already satisfied: accelerate>=0.34.0 in /usr/local/lib/python3.10/dist-packages (from trl) (1.2.1)\n", "Requirement already satisfied: rich in /usr/local/lib/python3.10/dist-packages (from trl) (13.9.4)\n", "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface_hub) (4.12.2)\n", "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.0->trl) (5.9.5)\n", "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate>=0.34.0->trl) (2.5.1+cu121)\n", "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (2.4.4)\n", "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.3.2)\n", "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (4.0.3)\n", "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (24.3.0)\n", "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.5.0)\n", "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (6.1.0)\n", "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (0.2.1)\n", "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets) (1.18.3)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.4.1)\n", "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (3.10)\n", "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2.3.0)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests->transformers) (2024.12.14)\n", "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2.8.2)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets) (2024.2)\n", "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.10/dist-packages (from rich->trl) (2.18.0)\n", "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.10/dist-packages (from markdown-it-py>=2.2.0->rich->trl) (0.1.2)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.10/dist-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.17.0)\n", "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate>=0.34.0->trl) (3.4.2)\n", "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate>=0.34.0->trl) (3.1.5)\n", "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate>=0.34.0->trl) (1.13.1)\n", "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.10.0->accelerate>=0.34.0->trl) (1.3.0)\n", "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate>=0.34.0->trl) (3.0.2)\n", "Downloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m15.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading trl-0.13.0-py3-none-any.whl (293 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m293.4/293.4 kB\u001b[0m \u001b[31m25.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m17.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hDownloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m18.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", "\u001b[?25hInstalling collected packages: xxhash, fsspec, dill, multiprocess, datasets, trl\n", " Attempting uninstall: fsspec\n", " Found existing installation: fsspec 2024.10.0\n", " Uninstalling fsspec-2024.10.0:\n", " Successfully uninstalled fsspec-2024.10.0\n", "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", "\u001b[0mSuccessfully installed datasets-3.2.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 trl-0.13.0 xxhash-3.5.0\n" ] } ], "source": [ "# Install the requirements in Google Colab\n", "!pip install transformers datasets trl huggingface_hub" ] }, { "cell_type": "code", "source": [ "# Import necessary libraries\n", "from transformers import AutoModelForCausalLM, AutoTokenizer\n", "from datasets import load_dataset\n", "from trl import SFTConfig, SFTTrainer, setup_chat_format, DataCollatorForCompletionOnlyLM\n", "import torch" ], "metadata": { "id": "7iQRJ-YHDCQu" }, "execution_count": 2, "outputs": [] }, { "cell_type": "code", "source": [ "device = (\n", " \"cuda\"\n", " if torch.cuda.is_available()\n", " else \"mps\" if torch.backends.mps.is_available() else \"cpu\"\n", ")" ], "metadata": { "id": "VYETUQkNDIPz" }, "execution_count": 3, "outputs": [] }, { "cell_type": "code", "source": [ "# Load the model and tokenizer\n", "model_name = \"HuggingFaceTB/SmolLM2-135M\"\n", "model = AutoModelForCausalLM.from_pretrained(\n", " pretrained_model_name_or_path=model_name\n", ").to(device)\n", "tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)" ], "metadata": { "id": "olO-YsF-DMSh" }, "execution_count": 37, "outputs": [] }, { "cell_type": "code", "source": [ "# Set up the chat format\n", "model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)" ], "metadata": { "id": "L2H0JHzBDTpm" }, "execution_count": 38, "outputs": [] }, { "cell_type": "code", "source": [ "# Set our name for the finetune to be saved &/ uploaded to\n", "finetune_name = \"SmolLM2-135M-SFT-smoltalk\"\n", "finetune_tags = [\"smol-course\",\"sft_finetuning\"]" ], "metadata": { "id": "YBiQ7YZPDhKe" }, "execution_count": 39, "outputs": [] }, { "cell_type": "code", "source": [ "# Let's test the base model before training\n", "prompt = \"Write a haiku about programming\"\n", "\n", "# Format with template\n", "messages = [{\"role\": \"user\", \"content\": prompt}]\n", "formatted_prompt = tokenizer.apply_chat_template(messages, tokenize=False)\n", "\n", "# Generate response\n", "inputs = tokenizer(formatted_prompt, return_tensors=\"pt\").to(device)\n", "outputs = model.generate(**inputs, max_new_tokens=100)\n", "print(\"Before training:\")\n", "print(tokenizer.decode(outputs[0], skip_special_tokens=True))" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "av7kKBd0DhkZ", "outputId": "27d49f1f-6f0b-4e77-d276-b5b482b8bfff" }, "execution_count": 40, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Before training:\n", "user\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a haiku about programming\n", "Write a\n" ] } ] }, { "cell_type": "code", "source": [ "# Load a sample dataset\n", "from datasets import load_dataset\n", "ds = load_dataset(path=\"HuggingFaceTB/smoltalk\", name=\"everyday-conversations\")\n", "ds" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qyWuLJDjDmqK", "outputId": "0162aeae-94bf-4d73-ec2a-e53c35d96bb3" }, "execution_count": 41, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['full_topic', 'messages'],\n", " num_rows: 2260\n", " })\n", " test: Dataset({\n", " features: ['full_topic', 'messages'],\n", " num_rows: 119\n", " })\n", "})" ] }, "metadata": {}, "execution_count": 41 } ] }, { "cell_type": "code", "source": [ "ds['train'][0]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_6D93RBmDxbk", "outputId": "1540bcb9-8191-44cd-b2ee-51abb6d0807a" }, "execution_count": 42, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'full_topic': 'Travel/Vacation destinations/Beach resorts',\n", " 'messages': [{'content': 'Hi there', 'role': 'user'},\n", " {'content': 'Hello! How can I help you today?', 'role': 'assistant'},\n", " {'content': \"I'm looking for a beach resort for my next vacation. Can you recommend some popular ones?\",\n", " 'role': 'user'},\n", " {'content': \"Some popular beach resorts include Maui in Hawaii, the Maldives, and the Bahamas. They're known for their beautiful beaches and crystal-clear waters.\",\n", " 'role': 'assistant'},\n", " {'content': 'That sounds great. Are there any resorts in the Caribbean that are good for families?',\n", " 'role': 'user'},\n", " {'content': 'Yes, the Turks and Caicos Islands and Barbados are excellent choices for family-friendly resorts in the Caribbean. They offer a range of activities and amenities suitable for all ages.',\n", " 'role': 'assistant'},\n", " {'content': \"Okay, I'll look into those. Thanks for the recommendations!\",\n", " 'role': 'user'},\n", " {'content': \"You're welcome. I hope you find the perfect resort for your vacation.\",\n", " 'role': 'assistant'}]}" ] }, "metadata": {}, "execution_count": 42 } ] }, { "cell_type": "code", "source": [ "def process_messages(samples):\n", " # Add 'human' role logic\n", " result = []\n", " for x in samples['messages']:\n", " if x[-1]['role'] == 'user': # Add condition for 'human' role\n", " result.append(x)\n", " else:\n", " result.append(x[:-1]) # Truncate the message if condition is not met\n", " return {'messages': result}\n", "\n", "# Applying the function on a dataset\n", "dataset = ds.map(process_messages, batched=True)" ], "metadata": { "id": "cSYoD4Y3FQdu" }, "execution_count": 43, "outputs": [] }, { "cell_type": "code", "source": [ "# Configure the SFTTrainer\n", "sft_config = SFTConfig(\n", " output_dir=\"./sft_output\",\n", " max_steps=500, # Adjust based on dataset size and desired training duration\n", " per_device_train_batch_size=16, # Set according to your GPU memory capacity\n", " learning_rate=5e-5, # Common starting point for fine-tuning\n", " logging_steps=50, # Frequency for finding training metrics\n", " save_steps=50, # Frequency for saving model checkpoints\n", " eval_strategy=\"steps\", # Evaluate the model at regular intervals\n", " eval_steps=50, # Frequency of evaluation\n", " use_mps_device=(\n", " True if device == \"mps\" else False\n", " ), # Use MPS for mixed precision training\n", " hub_model_id=finetune_name, # Set a unique name for your model\n", " report_to=[]\n", ")\n", "\n", "# Initialize the SFTTrainer\n", "trainer = SFTTrainer(\n", " model=model,\n", " args=sft_config,\n", " train_dataset=ds[\"train\"],\n", " processing_class=tokenizer,\n", " eval_dataset=ds[\"test\"],\n", ")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 49, "referenced_widgets": [ "5479c909ae014cb4af686f98dfd896cb", "77cd1a0126ea48f6bc358c52b5b22f3e", "452ca86ad5f44319a93031962f10fedf", "fa32534d4cf349d2a1efac6496277d2e", "cb16cc0f8c6847cda083dc0b2f186083", "3b59793ca26745ea8b3780e07447a56c", "bf13f411d2db420fb25d13d212ba257a", "075f0860a2ca452c8b1438aeae8e3d18", "555d876e65c9419b82633841914086d9", "e07fb7d992e04473ad7bac9248e7aa75", "4cf7dbf2cb6d4581a356970de1996ec6" ] }, "id": "nvtJ2H41JTrU", "outputId": "0a9eaf66-f20d-4a44-d54f-6f6428cab4f1" }, "execution_count": 44, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "Map: 0%| | 0/119 [00:00" ], "text/html": [ "\n", "
\n", " \n", " \n", " [500/500 17:08, Epoch 3/4]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining LossValidation Loss
501.2505001.109389
1001.0655001.067429
1501.0156001.044449
2000.8958001.034744
2500.8814001.030149
3000.8621001.029914
3500.7885001.028845
4000.7895001.027438
4500.7670001.030825
5000.7417001.031717

" ] }, "metadata": {} }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=500, training_loss=0.9057471542358398, metrics={'train_runtime': 1030.1888, 'train_samples_per_second': 7.766, 'train_steps_per_second': 0.485, 'total_flos': 1302438402256896.0, 'train_loss': 0.9057471542358398, 'epoch': 3.52112676056338})" ] }, "metadata": {}, "execution_count": 45 } ] }, { "cell_type": "code", "source": [ "# Save the model\n", "trainer.save_model(f\"./{finetune_name}\")" ], "metadata": { "id": "NJAdU1QBJfXK" }, "execution_count": 46, "outputs": [] }, { "cell_type": "code", "source": [ "trainer.push_to_hub(tags=finetune_tags)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 200, "referenced_widgets": [ "f13451f32d11428c97243b2bc5b5268c", "02da0b8c8e564c05a91e5dd3b7e1c9cb", "43e70aa073504a4496e543fd923b7b0f", "7056a43eec3a4718b160360ce9493409", "348abc5e3efe4fd69aee2329d91c17da", "4ededf6d88554f5495f76521f8be98d5", "46012658060c41e2a2af258e4025499e", "7c1de009823e492fb9a1648b393b449c", "c4af106fdac144d3b3010ceffc63b4d7", "b9b5a37d22644cacbc3e81a1682c5184", "390ace0ab34e4727824de62d724767a5", "d2eaa596146742e4937f7b1ec7320dbb", "34f9fa16d57247829d00de248eb0e1ae", "d40e8b6c471e4f4da2f7521955155955", "82fd1f3b07f24c4c8a89454477ab6b97", "b5b509b3f7bc4ad4b994c68317a07228", "e27f78d5d126483cb7be7c9e69913954", "9b2a21eb504d49df8cbfd2d6bff0ddbf", "43985ed44fc14c0dad5cf7cff9a47d40", "59115af19cbe4420b6b1c12bf406f6ed", "e5b320112ff346529e6a936eab29d95d", "4a78c564e91c4f048778f65e4905b22a", "86b0050f0417457784489f96c1313b4a", "9196c063ec4741fcbc40b3c77eadc81e", "b10f037ce64143cbbf713148a46c2a0d", "338fcb1a304341759ce1d53093b48c8e", "461f8d72aa934cb499b8c09031682a6d", "82e726ec9ba24dd79c3b02388634d08d", "9a4d24decc8e4bf0a066bbd5a968c3b2", "9c08523e49844618953ab8dd28b9f6fd", "d578d568fd4f4ac6aba0b5de3d1a2c1d", "60f3ec480a6549908acc753e67b9c3f4", "67cd4b245e02485b82f422c187cec1ae" ] }, "id": "yi9aab28Pk-C", "outputId": "6516478f-8af7-4a87-b52d-13128a0399fb" }, "execution_count": 47, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "model.safetensors: 0%| | 0.00/538M [00:00