{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Bert-NER-ru", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.9" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "38effe6811e0445ea6a06fbf62322cb2": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_9c3e627f3f214708a0eada8855e345e1", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_4b4eaf184a9a482798ac08ff5febafb9", "IPY_MODEL_c3566754043543218423794f629214d6", "IPY_MODEL_b92ff60477c043e0b2bdaea88a70295b" ] } }, "9c3e627f3f214708a0eada8855e345e1": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "4b4eaf184a9a482798ac08ff5febafb9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_5e07fb58967a4d03931682646417f324", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_c8847e2fd0654eabb62848c927685ba4" } }, "c3566754043543218423794f629214d6": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_bb59a31387e84bc18937ca0aafb36230", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 341, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 341, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7acb711b2f994c0c8c292f7735ccfd15" } }, "b92ff60477c043e0b2bdaea88a70295b": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_93c12e9ea0294f70b4dca310239b55e4", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 341/341 [00:00<00:00, 6.59kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_44595961fd42457496803732f50e813b" } }, "5e07fb58967a4d03931682646417f324": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "c8847e2fd0654eabb62848c927685ba4": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bb59a31387e84bc18937ca0aafb36230": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "7acb711b2f994c0c8c292f7735ccfd15": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "93c12e9ea0294f70b4dca310239b55e4": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "44595961fd42457496803732f50e813b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "97db5fbd0b134002a15f1e89b2e2a871": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_4ac3dbe72f9b4dd2b27d91630736fbcc", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_937d10a0f080492ab64efbb53f6ebe83", "IPY_MODEL_3dab395a7da545838fb6c54fc19420cc", "IPY_MODEL_712ed73184ec4551a6891862b3295b22" ] } }, "4ac3dbe72f9b4dd2b27d91630736fbcc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "937d10a0f080492ab64efbb53f6ebe83": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_5281ff75b7144218a6218c65d8789f48", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_bedec2ca0adc4239b35f94a23a9b3cdd" } }, "3dab395a7da545838fb6c54fc19420cc": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_8263aecd8ead4ceabf6112e536ac6dbd", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 632, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 632, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e7f42df803cc4ed6ae43ead43e283ed8" } }, "712ed73184ec4551a6891862b3295b22": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_57770139538d40c38c68112f622993f3", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 632/632 [00:00<00:00, 20.9kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_c3caeafc16f44882938d7c1dcb56cb18" } }, "5281ff75b7144218a6218c65d8789f48": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "bedec2ca0adc4239b35f94a23a9b3cdd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "8263aecd8ead4ceabf6112e536ac6dbd": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "e7f42df803cc4ed6ae43ead43e283ed8": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "57770139538d40c38c68112f622993f3": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "c3caeafc16f44882938d7c1dcb56cb18": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0389bbf4dde74c4db0117a6f66c8808f": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_bb94580fba5140078f7a4ac289308e6f", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_ff1d724506ef4ab1a73c4a6635a95cdb", "IPY_MODEL_6568a4b7ebba4b66ae324cfb1e4c6c56", "IPY_MODEL_37f3d4726084402ba1ff26773197b415" ] } }, "bb94580fba5140078f7a4ac289308e6f": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ff1d724506ef4ab1a73c4a6635a95cdb": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_6f02d9d6f80249e4949123b465d56584", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_f5e2e39b8c344f81b40f43c7645b6fae" } }, "6568a4b7ebba4b66ae324cfb1e4c6c56": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_2f8e9faf6afe4b278a4889a13e29fd68", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 241082, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 241082, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_ff12a26f63e643fbb301dffe15f375cf" } }, "37f3d4726084402ba1ff26773197b415": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_2fd0ded25025401d917bc2624b23a5ce", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 241k/241k [00:00<00:00, 3.11MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_fd461fa9ee1249dbb49e10bf7201f9d9" } }, "6f02d9d6f80249e4949123b465d56584": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "f5e2e39b8c344f81b40f43c7645b6fae": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "2f8e9faf6afe4b278a4889a13e29fd68": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "ff12a26f63e643fbb301dffe15f375cf": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "2fd0ded25025401d917bc2624b23a5ce": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "fd461fa9ee1249dbb49e10bf7201f9d9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "91e9fdb905864efa8d42ee7cb3680e08": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_d8aa99a5f7cb42b9b3ca28edb0b0a007", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_0f4c5d11ce814b83bb85389b3c5a4c5f", "IPY_MODEL_62ae82032a8a403b989ee8e0ab06f58a", "IPY_MODEL_89ccf6c81454461c9a94ee0b9820d4a9" ] } }, "d8aa99a5f7cb42b9b3ca28edb0b0a007": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "0f4c5d11ce814b83bb85389b3c5a4c5f": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_c76403b23ddd4fd5a644e63e21da8358", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_303a6382c4ff45439a40c47e969306da" } }, "62ae82032a8a403b989ee8e0ab06f58a": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_24bff1add1804f5ea46b5edaf4ca7428", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 468145, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 468145, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_6e2ff1b9214049188ac0587fb33b5352" } }, "89ccf6c81454461c9a94ee0b9820d4a9": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_5f25f5e5ad604408bcb53d0b70f67f20", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 468k/468k [00:00<00:00, 5.63MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_76260400334d4c4f8985e69b4800ae28" } }, "c76403b23ddd4fd5a644e63e21da8358": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "303a6382c4ff45439a40c47e969306da": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "24bff1add1804f5ea46b5edaf4ca7428": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "6e2ff1b9214049188ac0587fb33b5352": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "5f25f5e5ad604408bcb53d0b70f67f20": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "76260400334d4c4f8985e69b4800ae28": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "21fdf7eb6fb94da0bc9639b3f4ea7f00": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_20b1444148dc40f89b107e30dbcf6c0b", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_36dc0d7a19c04a47a06618e187ee894a", "IPY_MODEL_791e009541a64d749330e6123ca7d87f", "IPY_MODEL_06c63accbd2a4903b762ed21545bfbbe" ] } }, "20b1444148dc40f89b107e30dbcf6c0b": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "36dc0d7a19c04a47a06618e187ee894a": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_07fee35962004c8996c8acef923292eb", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b3332e5f66ad4c6c830f28bc290cd4bd" } }, "791e009541a64d749330e6123ca7d87f": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_b831ba8c276b4a1bb0ef7ae16a7a8fc9", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 112, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 112, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_7eff20962dfe422b9523c4e74f5372aa" } }, "06c63accbd2a4903b762ed21545bfbbe": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_663c10e13a0e47f7b115ad50bd5b3965", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 112/112 [00:00<00:00, 3.03kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_997504803ac5445588c07cf97049d14a" } }, "07fee35962004c8996c8acef923292eb": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "b3332e5f66ad4c6c830f28bc290cd4bd": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b831ba8c276b4a1bb0ef7ae16a7a8fc9": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "7eff20962dfe422b9523c4e74f5372aa": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "663c10e13a0e47f7b115ad50bd5b3965": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "997504803ac5445588c07cf97049d14a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "e16827a6f03a4b92889daf18d9917126": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_7b3185264cfe469683ad9cc81b0a8484", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_20ea5f6227b041969b1ce0d686a39121", "IPY_MODEL_78818c7330fd489c8820d845afab2fca", "IPY_MODEL_19c53fdf63a0408e8ad85e56a94d7dcd" ] } }, "7b3185264cfe469683ad9cc81b0a8484": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "20ea5f6227b041969b1ce0d686a39121": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_43d9892c92f343369d714477c706d45a", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_91f00c3eed1b41d288a53cf829f88555" } }, "78818c7330fd489c8820d845afab2fca": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_f2f2a65d8c5d4627855526eccf8c68d7", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 4, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 4, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_2c4a9988fa90474ba9aa1f48bf03704a" } }, "19c53fdf63a0408e8ad85e56a94d7dcd": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_ec76c271eccd45c7b8d28a15274b1d50", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 4/4 [00:00<00:00, 6.93ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_eefb8e1e66ff486e92c0ce618c12be66" } }, "43d9892c92f343369d714477c706d45a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "91f00c3eed1b41d288a53cf829f88555": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "f2f2a65d8c5d4627855526eccf8c68d7": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "2c4a9988fa90474ba9aa1f48bf03704a": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "ec76c271eccd45c7b8d28a15274b1d50": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "eefb8e1e66ff486e92c0ce618c12be66": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b27b3845581c4dcba258672ecde20982": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_16091398feea4bfb8e5c07a679934a3e", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_b6f776fb81874175a00f9d4569edf89c", "IPY_MODEL_9ae961550c684fbda20bbec6043ca80e", "IPY_MODEL_54e1fd908fed41c981e8bb39068da20c" ] } }, "16091398feea4bfb8e5c07a679934a3e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "b6f776fb81874175a00f9d4569edf89c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_c46c29c17e5d41128bb8bc401c5c4c8c", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_d0470494b6be4bf0af4fc7007e285149" } }, "9ae961550c684fbda20bbec6043ca80e": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_9c406cfcfcf9424181939d2f6009090a", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 1, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 1, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_d2e8a291b9c54a45ba9f812ec6d19fcc" } }, "54e1fd908fed41c981e8bb39068da20c": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_6fb72f595fb14608ae7e6a20c2e410a9", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 1/1 [00:00<00:00, 5.44ba/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_e174a5cbd06441329ccd8cb547b44503" } }, "c46c29c17e5d41128bb8bc401c5c4c8c": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "d0470494b6be4bf0af4fc7007e285149": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "9c406cfcfcf9424181939d2f6009090a": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "d2e8a291b9c54a45ba9f812ec6d19fcc": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "6fb72f595fb14608ae7e6a20c2e410a9": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "e174a5cbd06441329ccd8cb547b44503": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "178b2e70a03141c3a8d14c03b1024d34": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_a2a75ec023b64fb0813a0d3b9da549a3", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_6446a516afdf4406a5a8c876aa2a0179", "IPY_MODEL_787dde6c71aa4303a3f1ae908bdf3288", "IPY_MODEL_04e795570e2245e2844e7acfd36611e4" ] } }, "a2a75ec023b64fb0813a0d3b9da549a3": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "6446a516afdf4406a5a8c876aa2a0179": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_a0441eab19d441d7ae60ea926a1ddabe", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: 100%", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_00a1bad590e34fc09067414ed4bae1d9" } }, "787dde6c71aa4303a3f1ae908bdf3288": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_5b3223766ec44a0781e88c6f0d276c12", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 47679974, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 47679974, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_085cbbc409874d57b38930b6b05ecfd9" } }, "04e795570e2245e2844e7acfd36611e4": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_1a255ac062e94624a2ecfb4f58889d74", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 47.7M/47.7M [00:01<00:00, 47.3MB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_1d51207792fd4afb849e2ae72ddd68ce" } }, "a0441eab19d441d7ae60ea926a1ddabe": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "00a1bad590e34fc09067414ed4bae1d9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "5b3223766ec44a0781e88c6f0d276c12": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "085cbbc409874d57b38930b6b05ecfd9": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "1a255ac062e94624a2ecfb4f58889d74": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "1d51207792fd4afb849e2ae72ddd68ce": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "bba28fea430d436981b0bfab06fb4ee6": { "model_module": "@jupyter-widgets/controls", "model_name": "HBoxModel", "state": { "_view_name": "HBoxView", "_dom_classes": [], "_model_name": "HBoxModel", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.5.0", "box_style": "", "layout": "IPY_MODEL_fe16833673ba4dc79001d9b3d28eb6d2", "_model_module": "@jupyter-widgets/controls", "children": [ "IPY_MODEL_513e9fa4a8a04f889d68ac7658818465", "IPY_MODEL_b38e9ab8c6fa4ea1952a2265dcdcfff5", "IPY_MODEL_91c93a7307854ac98d9bdf6746286517" ] } }, "fe16833673ba4dc79001d9b3d28eb6d2": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "513e9fa4a8a04f889d68ac7658818465": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_c291de1c9ba343ff8e140a8c7ef1496a", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": "Downloading: ", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_b1e72abd13c84f08b763eae36928fb4e" } }, "b38e9ab8c6fa4ea1952a2265dcdcfff5": { "model_module": "@jupyter-widgets/controls", "model_name": "FloatProgressModel", "state": { "_view_name": "ProgressView", "style": "IPY_MODEL_251fc9cb4f574aa4881b722cef11324a", "_dom_classes": [], "description": "", "_model_name": "FloatProgressModel", "bar_style": "success", "max": 2482, "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": 2482, "_view_count": null, "_view_module_version": "1.5.0", "orientation": "horizontal", "min": 0, "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_71c5d3acf8674d77b38d6fa6b0aba8d5" } }, "91c93a7307854ac98d9bdf6746286517": { "model_module": "@jupyter-widgets/controls", "model_name": "HTMLModel", "state": { "_view_name": "HTMLView", "style": "IPY_MODEL_94419c78831745928f8e5327f53ef5e0", "_dom_classes": [], "description": "", "_model_name": "HTMLModel", "placeholder": "​", "_view_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "value": " 6.34k/? [00:00<00:00, 157kB/s]", "_view_count": null, "_view_module_version": "1.5.0", "description_tooltip": null, "_model_module": "@jupyter-widgets/controls", "layout": "IPY_MODEL_31a5bb08896f47309e658dac697b6680" } }, "c291de1c9ba343ff8e140a8c7ef1496a": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "b1e72abd13c84f08b763eae36928fb4e": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "251fc9cb4f574aa4881b722cef11324a": { "model_module": "@jupyter-widgets/controls", "model_name": "ProgressStyleModel", "state": { "_view_name": "StyleView", "_model_name": "ProgressStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "bar_color": null, "_model_module": "@jupyter-widgets/controls" } }, "71c5d3acf8674d77b38d6fa6b0aba8d5": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } }, "94419c78831745928f8e5327f53ef5e0": { "model_module": "@jupyter-widgets/controls", "model_name": "DescriptionStyleModel", "state": { "_view_name": "StyleView", "_model_name": "DescriptionStyleModel", "description_width": "", "_view_module": "@jupyter-widgets/base", "_model_module_version": "1.5.0", "_view_count": null, "_view_module_version": "1.2.0", "_model_module": "@jupyter-widgets/controls" } }, "31a5bb08896f47309e658dac697b6680": { "model_module": "@jupyter-widgets/base", "model_name": "LayoutModel", "state": { "_view_name": "LayoutView", "grid_template_rows": null, "right": null, "justify_content": null, "_view_module": "@jupyter-widgets/base", "overflow": null, "_model_module_version": "1.2.0", "_view_count": null, "flex_flow": null, "width": null, "min_width": null, "border": null, "align_items": null, "bottom": null, "_model_module": "@jupyter-widgets/base", "top": null, "grid_column": null, "overflow_y": null, "overflow_x": null, "grid_auto_flow": null, "grid_area": null, "grid_template_columns": null, "flex": null, "_model_name": "LayoutModel", "justify_items": null, "grid_row": null, "max_height": null, "align_content": null, "visibility": null, "align_self": null, "height": null, "min_height": null, "padding": null, "grid_auto_rows": null, "grid_gap": null, "max_width": null, "order": null, "_view_module_version": "1.2.0", "grid_template_areas": null, "object_position": null, "object_fit": null, "grid_auto_columns": null, "margin": null, "display": null, "left": null } } } }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "metadata": { "id": "X4cRE8IbIrIV" }, "source": [ "Основано на блокноте https://github.com/huggingface/notebooks/blob/master/examples/token_classification.ipynb" ] }, { "cell_type": "code", "metadata": { "id": "MOsHUjgdIrIW", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "01a47b68-0bce-413b-d844-f0825d60e46c" }, "source": [ "! pip install datasets transformers seqeval" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Collecting datasets\n", " Downloading datasets-1.10.2-py3-none-any.whl (542 kB)\n", "\u001b[K |████████████████████████████████| 542 kB 5.2 MB/s \n", "\u001b[?25hCollecting transformers\n", " Downloading transformers-4.9.0-py3-none-any.whl (2.6 MB)\n", "\u001b[K |████████████████████████████████| 2.6 MB 50.1 MB/s \n", "\u001b[?25hCollecting seqeval\n", " Downloading seqeval-1.2.2.tar.gz (43 kB)\n", "\u001b[K |████████████████████████████████| 43 kB 2.6 MB/s \n", "\u001b[?25hRequirement already satisfied: dill in /usr/local/lib/python3.7/dist-packages (from datasets) (0.3.4)\n", "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.7/dist-packages (from datasets) (1.19.5)\n", "Requirement already satisfied: importlib-metadata in /usr/local/lib/python3.7/dist-packages (from datasets) (4.6.1)\n", "Collecting huggingface-hub<0.1.0\n", " Downloading huggingface_hub-0.0.14-py3-none-any.whl (43 kB)\n", "\u001b[K |████████████████████████████████| 43 kB 1.9 MB/s \n", "\u001b[?25hCollecting xxhash\n", " Downloading xxhash-2.0.2-cp37-cp37m-manylinux2010_x86_64.whl (243 kB)\n", "\u001b[K |████████████████████████████████| 243 kB 60.7 MB/s \n", "\u001b[?25hCollecting fsspec>=2021.05.0\n", " Downloading fsspec-2021.7.0-py3-none-any.whl (118 kB)\n", "\u001b[K |████████████████████████████████| 118 kB 62.4 MB/s \n", "\u001b[?25hRequirement already satisfied: pyarrow!=4.0.0,>=1.0.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (3.0.0)\n", "Requirement already satisfied: packaging in /usr/local/lib/python3.7/dist-packages (from datasets) (21.0)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from datasets) (1.1.5)\n", "Requirement already satisfied: requests>=2.19.0 in /usr/local/lib/python3.7/dist-packages (from datasets) (2.23.0)\n", "Collecting tqdm>=4.42\n", " Downloading tqdm-4.61.2-py2.py3-none-any.whl (76 kB)\n", "\u001b[K |████████████████████████████████| 76 kB 5.2 MB/s \n", "\u001b[?25hRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from datasets) (0.70.12.2)\n", "Requirement already satisfied: filelock in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<0.1.0->datasets) (3.0.12)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.7/dist-packages (from huggingface-hub<0.1.0->datasets) (3.7.4.3)\n", "Requirement already satisfied: pyparsing>=2.0.2 in /usr/local/lib/python3.7/dist-packages (from packaging->datasets) (2.4.7)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2021.5.30)\n", "Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (1.24.3)\n", "Requirement already satisfied: chardet<4,>=3.0.2 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (3.0.4)\n", "Requirement already satisfied: idna<3,>=2.5 in /usr/local/lib/python3.7/dist-packages (from requests>=2.19.0->datasets) (2.10)\n", "Collecting tokenizers<0.11,>=0.10.1\n", " Downloading tokenizers-0.10.3-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (3.3 MB)\n", "\u001b[K |████████████████████████████████| 3.3 MB 62.1 MB/s \n", "\u001b[?25hCollecting pyyaml>=5.1\n", " Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)\n", "\u001b[K |████████████████████████████████| 636 kB 57.5 MB/s \n", "\u001b[?25hRequirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.7/dist-packages (from transformers) (2019.12.20)\n", "Collecting huggingface-hub<0.1.0\n", " Downloading huggingface_hub-0.0.12-py3-none-any.whl (37 kB)\n", "Collecting sacremoses\n", " Downloading sacremoses-0.0.45-py3-none-any.whl (895 kB)\n", "\u001b[K |████████████████████████████████| 895 kB 69.1 MB/s \n", "\u001b[?25hRequirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (0.22.2.post1)\n", "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.0.1)\n", "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.1)\n", "Requirement already satisfied: zipp>=0.5 in /usr/local/lib/python3.7/dist-packages (from importlib-metadata->datasets) (3.5.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2.8.1)\n", "Requirement already satisfied: pytz>=2017.2 in /usr/local/lib/python3.7/dist-packages (from pandas->datasets) (2018.9)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->datasets) (1.15.0)\n", "Requirement already satisfied: click in /usr/local/lib/python3.7/dist-packages (from sacremoses->transformers) (7.1.2)\n", "Building wheels for collected packages: seqeval\n", " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n", " Created wheel for seqeval: filename=seqeval-1.2.2-py3-none-any.whl size=16181 sha256=1d930527604ae3c914ac231bfe1781484e6655a68be79a5d227848fa23c12442\n", " Stored in directory: /root/.cache/pip/wheels/05/96/ee/7cac4e74f3b19e3158dce26a20a1c86b3533c43ec72a549fd7\n", "Successfully built seqeval\n", "Installing collected packages: tqdm, xxhash, tokenizers, sacremoses, pyyaml, huggingface-hub, fsspec, transformers, seqeval, datasets\n", " Attempting uninstall: tqdm\n", " Found existing installation: tqdm 4.41.1\n", " Uninstalling tqdm-4.41.1:\n", " Successfully uninstalled tqdm-4.41.1\n", " Attempting uninstall: pyyaml\n", " Found existing installation: PyYAML 3.13\n", " Uninstalling PyYAML-3.13:\n", " Successfully uninstalled PyYAML-3.13\n", "Successfully installed datasets-1.10.2 fsspec-2021.7.0 huggingface-hub-0.0.12 pyyaml-5.4.1 sacremoses-0.0.45 seqeval-1.2.2 tokenizers-0.10.3 tqdm-4.61.2 transformers-4.9.0 xxhash-2.0.2\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "4HL1yaESsJA9" }, "source": [ "В этом блокноте мы дообучаем модель на задаче классификации отдельных слов, а именно, распознавание именованных сущностей (aka named entity recognition, aka NER). Мы возьмём датасет медицинских сущностей, но в целом пайплайн подходит для любой задачи на выделение сущностей в тексте. \n", "\n", "Для скорости мы возьмём маленький BERT для русского языка [rubert-tiny](https://huggingface.co/cointegrated/rubert-tiny); если взять другую, более крупную BERT-подобную модель, качество NER может быть выше, но и время обучения и работы будет дольше \n" ] }, { "cell_type": "markdown", "metadata": { "id": "4RRkXuteIrIh" }, "source": [ "This notebook is built to run on any token classification task, with any model checkpoint from the [Model Hub](https://huggingface.co/models) as long as that model has a version with a token classification head and a fast tokenizer (check on [this table](https://huggingface.co/transformers/index.html#bigtable) if this is the case). It might just need some small adjustments if you decide to use a different dataset than the one used here. Depending on you model and the GPU you are using, you might need to adjust the batch size to avoid out-of-memory errors. Set those three parameters, then the rest of the notebook should run smoothly:" ] }, { "cell_type": "code", "metadata": { "id": "zVvslsfMIrIh" }, "source": [ "model_checkpoint = \"cointegrated/rubert-tiny\"\n", "batch_size = 16" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "whPRbBNbIrIl" }, "source": [ "## Loading the dataset" ] }, { "cell_type": "markdown", "metadata": { "id": "J8mt63rWvkv3" }, "source": [ "Для обучения мы возьмём [Russian Drug Reaction Corpus](https://github.com/cimm-kzn/RuDReC): размеченный корпус русскоязычных отзывов на лекарства. \n", "\n", "Загрузим мы его библиотекой corus, потому что это удобно " ] }, { "cell_type": "code", "metadata": { "id": "IreSlFmlIrIm" }, "source": [ "from datasets import load_dataset, load_metric" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "s_AY1ATSIrIq", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "b188000c-2d65-4762-f25f-1b03bc8e49a3" }, "source": [ "!wget https://github.com/cimm-kzn/RuDReC/raw/master/data/rudrec_annotated.json\n", "!pip install corus razdel" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "--2021-07-23 07:39:27-- https://github.com/cimm-kzn/RuDReC/raw/master/data/rudrec_annotated.json\n", "Resolving github.com (github.com)... 140.82.112.4\n", "Connecting to github.com (github.com)|140.82.112.4|:443... connected.\n", "HTTP request sent, awaiting response... 302 Found\n", "Location: https://raw.githubusercontent.com/cimm-kzn/RuDReC/master/data/rudrec_annotated.json [following]\n", "--2021-07-23 07:39:27-- https://raw.githubusercontent.com/cimm-kzn/RuDReC/master/data/rudrec_annotated.json\n", "Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...\n", "Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 1773014 (1.7M) [text/plain]\n", "Saving to: ‘rudrec_annotated.json’\n", "\n", "rudrec_annotated.js 100%[===================>] 1.69M --.-KB/s in 0.06s \n", "\n", "2021-07-23 07:39:27 (27.9 MB/s) - ‘rudrec_annotated.json’ saved [1773014/1773014]\n", "\n", "Collecting corus\n", " Downloading corus-0.9.0-py3-none-any.whl (83 kB)\n", "\u001b[K |████████████████████████████████| 83 kB 1.3 MB/s \n", "\u001b[?25hCollecting razdel\n", " Downloading razdel-0.5.0-py3-none-any.whl (21 kB)\n", "Installing collected packages: razdel, corus\n", "Successfully installed corus-0.9.0 razdel-0.5.0\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "VALH-KBTMfVI", "outputId": "385f3c15-6aa7-4c9a-bd02-8db5c3a593fb" }, "source": [ "from corus import load_rudrec\n", "drugs = list(load_rudrec('rudrec_annotated.json'))\n", "print(len(drugs))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "4809\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "fGBywJmAv2NN" }, "source": [ "Пример документа:" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ynPlkV5gv4XC", "outputId": "e3fe1c20-6f9d-4921-d56b-71d810de8143" }, "source": [ "drugs[0]" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RuDReCRecord(\n", " file_name='172744.tsv',\n", " text='нам прописали, так мой ребенок сыпью покрылся, глаза опухли, сверху и снизу на веках высыпала сыпь, ( 8 месяцев сыну)А от виферона такого не было... У кого ещё такие побочки, отзовитесь!1 Чем спасались?\\n',\n", " sentence_id=0,\n", " entities=[RuDReCEntity(\n", " entity_id='*[0]_se',\n", " entity_text='виферона',\n", " entity_type='Drugform',\n", " start=122,\n", " end=130,\n", " concept_id='C0021735',\n", " concept_name=nan\n", " ), RuDReCEntity(\n", " entity_id='*[1]',\n", " entity_text='сыпью покрылся',\n", " entity_type='ADR',\n", " start=31,\n", " end=45,\n", " concept_id='C0015230',\n", " concept_name=nan\n", " ), RuDReCEntity(\n", " entity_id='*[2]',\n", " entity_text='глаза опухли',\n", " entity_type='ADR',\n", " start=47,\n", " end=59,\n", " concept_id='C4760994',\n", " concept_name=nan\n", " ), RuDReCEntity(\n", " entity_id='*[3]',\n", " entity_text='на веках высыпала сыпь',\n", " entity_type='ADR',\n", " start=76,\n", " end=98,\n", " concept_id='C0015230',\n", " concept_name=nan\n", " )]\n", ")" ] }, "metadata": { "tags": [] }, "execution_count": 6 } ] }, { "cell_type": "markdown", "metadata": { "id": "iSpV6RLEwI5o" }, "source": [ "Посмотрим, какие сущности есть: лекарства, форма лекарств, класс лекарств, показания к применению, побочки, и прочие болезни/симптомы.\n", "\n", "https://arxiv.org/abs/2004.03659\n", "\n", "* **DRUGNAME** Mentions of the brand name of a drug or product\n", "ingredients/active compounds.\n", "* **DRUGCLASS** Mentions of drug classes such as anti-inflammatory or\n", "cardiovascular.\n", "* **DRUGFORM** Mentions of routes of administration such as tablet\n", "or liquid that describe the physical form in which\n", "medication will be delivered into patient’s organism.\n", "* **DI** Any indication/symptom that specifies the reason for\n", "taking/prescribing the drug.\n", "* **ADR** Mentions of untoward medical events that occur as a\n", "consequence of drug intake and are not associated with\n", "treated symptoms.\n", "* **FINDING** Any DI or ADR that was not directly experienced by the\n", "reporting patient or his/her family members, or related to\n", "medical history/drug label, or any disease entities if the\n", "annotator is not clear about type" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vo8MIceYNPjx", "outputId": "23d8f04f-f7d3-4a20-b840-a8b915b8ab47" }, "source": [ "from collections import Counter, defaultdict\n", "type2text = defaultdict(Counter)\n", "ents = Counter()\n", "for item in drugs:\n", " for e in item.entities:\n", " ents[e.entity_type] += 1\n", " type2text[e.entity_type][e.entity_text] += 1\n", "\n", "for k, v in ents.most_common():\n", " print(k, v)\n", " print(type2text[k].most_common(3))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "DI 1401\n", "[('простуды', 64), ('ОРВИ', 47), ('профилактики', 42)]\n", "Drugname 1043\n", "[('Виферон', 33), ('Анаферон', 25), ('Циклоферон', 24)]\n", "Drugform 836\n", "[('таблетки', 154), ('таблеток', 79), ('свечи', 63)]\n", "ADR 720\n", "[('аллергия', 16), ('слабость', 13), ('диарея', 12)]\n", "Drugclass 330\n", "[('противовирусный', 21), ('противовирусное', 18), ('противовирусных', 13)]\n", "Finding 236\n", "[('аллергии', 12), ('температуры', 6), ('сонливости', 5)]\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 52 }, "id": "0Kszaqs8N0Ig", "outputId": "34a697ef-c96d-40bb-c6ed-fe6f2c979ab1" }, "source": [ "drugs[0].text" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'нам прописали, так мой ребенок сыпью покрылся, глаза опухли, сверху и снизу на веках высыпала сыпь, ( 8 месяцев сыну)А от виферона такого не было... У кого ещё такие побочки, отзовитесь!1 Чем спасались?\\n'" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] }, { "cell_type": "markdown", "metadata": { "id": "RzfPtOMoIrIu" }, "source": [ "Напишем функцию, перекладывающую разметку сущностей на уровень слов. Будем использовать [IOB](https://en.wikipedia.org/wiki/Inside–outside–beginning_(tagging))-нотацию, чтобы разделять несколько сущностей одного типа, идущих подряд. " ] }, { "cell_type": "code", "metadata": { "id": "Dg9BL4Z_OcjY" }, "source": [ "from razdel import tokenize\n", "\n", "def extract_labels(item):\n", " raw_toks = list(tokenize(item.text))\n", " words = [tok.text for tok in raw_toks]\n", " word_labels = ['O'] * len(raw_toks)\n", " char2word = [None] * len(item.text)\n", " for i, word in enumerate(raw_toks):\n", " char2word[word.start:word.stop] = [i] * len(word.text)\n", "\n", " for e in item.entities:\n", " e_words = sorted({idx for idx in char2word[e.start:e.end] if idx is not None})\n", " word_labels[e_words[0]] = 'B-' + e.entity_type\n", " for idx in e_words[1:]:\n", " word_labels[idx] = 'I-' + e.entity_type\n", "\n", " return {'tokens': words, 'tags': word_labels}" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "PCGwQAadOVA9", "outputId": "cb55c0b3-bdc5-4b5c-feae-c560c38554cd" }, "source": [ "print(extract_labels(drugs[0]))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "{'tokens': ['нам', 'прописали', ',', 'так', 'мой', 'ребенок', 'сыпью', 'покрылся', ',', 'глаза', 'опухли', ',', 'сверху', 'и', 'снизу', 'на', 'веках', 'высыпала', 'сыпь', ',', '(', '8', 'месяцев', 'сыну', ')', 'А', 'от', 'виферона', 'такого', 'не', 'было', '...', 'У', 'кого', 'ещё', 'такие', 'побочки', ',', 'отзовитесь', '!', '1', 'Чем', 'спасались', '?'], 'tags': ['O', 'O', 'O', 'O', 'O', 'O', 'B-ADR', 'I-ADR', 'O', 'B-ADR', 'I-ADR', 'O', 'O', 'O', 'O', 'B-ADR', 'I-ADR', 'I-ADR', 'I-ADR', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-Drugform', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O']}\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "Chhlmjt8OEgn" }, "source": [ "from sklearn.model_selection import train_test_split\n", "ner_data = [extract_labels(item) for item in drugs]\n", "ner_train, ner_test = train_test_split(ner_data, test_size=0.2, random_state=1)" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "yvApziHbyUyR" }, "source": [ "Пример данных" ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 137 }, "id": "17yA19oFRwMk", "outputId": "0f5322ef-f6cf-4099-a34c-d298a3a72f72" }, "source": [ "import pandas as pd\n", "pd.options.display.max_colwidth = 300\n", "pd.DataFrame(ner_train).sample(3)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
tokenstags
3132[Но, в, 3, месяца, нам, ставили, гипертонус, ручек, и, ножек, .][O, O, O, O, O, O, B-DI, I-DI, I-DI, I-DI, O]
355[У, меня, двое, детей, .][O, O, O, O, O]
3101[Не, спорю, наслышана, о, широте, его, применения, ,, но, нам, он, не, подошел, абсолютно, !][O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]
\n", "
" ], "text/plain": [ " tokens tags\n", "3132 [Но, в, 3, месяца, нам, ставили, гипертонус, ручек, и, ножек, .] [O, O, O, O, O, O, B-DI, I-DI, I-DI, I-DI, O]\n", "355 [У, меня, двое, детей, .] [O, O, O, O, O]\n", "3101 [Не, спорю, наслышана, о, широте, его, применения, ,, но, нам, он, не, подошел, абсолютно, !] [O, O, O, O, O, O, O, O, O, O, O, O, O, O, O]" ] }, "metadata": { "tags": [] }, "execution_count": 12 } ] }, { "cell_type": "markdown", "metadata": { "id": "sE0souTBykq1" }, "source": [ "Соберём все виды меток в список. " ] }, { "cell_type": "code", "metadata": { "id": "16SRNc6csJBC", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "a06bc57e-5b17-4b45-8009-a7cbfc5e7592" }, "source": [ "label_list = sorted({label for item in ner_train for label in item['tags']})\n", "if 'O' in label_list:\n", " label_list.remove('O')\n", " label_list = ['O'] + label_list\n", "label_list" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "['O',\n", " 'B-ADR',\n", " 'B-DI',\n", " 'B-Drugclass',\n", " 'B-Drugform',\n", " 'B-Drugname',\n", " 'B-Finding',\n", " 'I-ADR',\n", " 'I-DI',\n", " 'I-Drugclass',\n", " 'I-Drugform',\n", " 'I-Drugname',\n", " 'I-Finding']" ] }, "metadata": { "tags": [] }, "execution_count": 13 } ] }, { "cell_type": "markdown", "metadata": { "id": "ckjbVWLoyYYf" }, "source": [ "Сложим наши данные в объект [`DatasetDict`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasetdict), нативный для huggingface." ] }, { "cell_type": "code", "metadata": { "id": "4E3yy6wmUp-z" }, "source": [ "from datasets import Dataset, DatasetDict" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "3YavIR4eU5ZY", "outputId": "8b2ae069-88ac-405a-83a8-ff4a73d7d215" }, "source": [ "ner_data = DatasetDict({\n", " 'train': Dataset.from_pandas(pd.DataFrame(ner_train)),\n", " 'test': Dataset.from_pandas(pd.DataFrame(ner_test))\n", "})\n", "ner_data" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "DatasetDict({\n", " train: Dataset({\n", " features: ['tokens', 'tags'],\n", " num_rows: 3847\n", " })\n", " test: Dataset({\n", " features: ['tokens', 'tags'],\n", " num_rows: 962\n", " })\n", "})" ] }, "metadata": { "tags": [] }, "execution_count": 15 } ] }, { "cell_type": "markdown", "metadata": { "id": "n9qywopnIrJH" }, "source": [ "## Preprocessing the data" ] }, { "cell_type": "markdown", "metadata": { "id": "YVx71GdAIrJH" }, "source": [ "Before we can feed those texts to our model, we need to preprocess them. This is done by a 🤗 Transformers `Tokenizer` which will (as the name indicates) tokenize the inputs (including converting the tokens to their corresponding IDs in the pretrained vocabulary) and put it in a format the model expects, as well as generate the other inputs that model requires.\n", "\n", "To do all of this, we instantiate our tokenizer with the `AutoTokenizer.from_pretrained` method, which will ensure:\n", "\n", "- we get a tokenizer that corresponds to the model architecture we want to use,\n", "- we download the vocabulary used when pretraining this specific checkpoint.\n", "\n", "That vocabulary will be cached, so it's not downloaded again the next time we run the cell." ] }, { "cell_type": "code", "metadata": { "id": "eXNLu_-nIrJI", "colab": { "base_uri": "https://localhost:8080/", "height": 177, "referenced_widgets": [ "38effe6811e0445ea6a06fbf62322cb2", "9c3e627f3f214708a0eada8855e345e1", "4b4eaf184a9a482798ac08ff5febafb9", "c3566754043543218423794f629214d6", "b92ff60477c043e0b2bdaea88a70295b", "5e07fb58967a4d03931682646417f324", "c8847e2fd0654eabb62848c927685ba4", "bb59a31387e84bc18937ca0aafb36230", "7acb711b2f994c0c8c292f7735ccfd15", "93c12e9ea0294f70b4dca310239b55e4", "44595961fd42457496803732f50e813b", "97db5fbd0b134002a15f1e89b2e2a871", "4ac3dbe72f9b4dd2b27d91630736fbcc", "937d10a0f080492ab64efbb53f6ebe83", "3dab395a7da545838fb6c54fc19420cc", "712ed73184ec4551a6891862b3295b22", "5281ff75b7144218a6218c65d8789f48", "bedec2ca0adc4239b35f94a23a9b3cdd", "8263aecd8ead4ceabf6112e536ac6dbd", "e7f42df803cc4ed6ae43ead43e283ed8", "57770139538d40c38c68112f622993f3", "c3caeafc16f44882938d7c1dcb56cb18", "0389bbf4dde74c4db0117a6f66c8808f", "bb94580fba5140078f7a4ac289308e6f", "ff1d724506ef4ab1a73c4a6635a95cdb", "6568a4b7ebba4b66ae324cfb1e4c6c56", "37f3d4726084402ba1ff26773197b415", "6f02d9d6f80249e4949123b465d56584", "f5e2e39b8c344f81b40f43c7645b6fae", "2f8e9faf6afe4b278a4889a13e29fd68", "ff12a26f63e643fbb301dffe15f375cf", "2fd0ded25025401d917bc2624b23a5ce", "fd461fa9ee1249dbb49e10bf7201f9d9", "91e9fdb905864efa8d42ee7cb3680e08", "d8aa99a5f7cb42b9b3ca28edb0b0a007", "0f4c5d11ce814b83bb85389b3c5a4c5f", "62ae82032a8a403b989ee8e0ab06f58a", "89ccf6c81454461c9a94ee0b9820d4a9", "c76403b23ddd4fd5a644e63e21da8358", "303a6382c4ff45439a40c47e969306da", "24bff1add1804f5ea46b5edaf4ca7428", "6e2ff1b9214049188ac0587fb33b5352", "5f25f5e5ad604408bcb53d0b70f67f20", "76260400334d4c4f8985e69b4800ae28", "21fdf7eb6fb94da0bc9639b3f4ea7f00", "20b1444148dc40f89b107e30dbcf6c0b", "36dc0d7a19c04a47a06618e187ee894a", "791e009541a64d749330e6123ca7d87f", "06c63accbd2a4903b762ed21545bfbbe", "07fee35962004c8996c8acef923292eb", "b3332e5f66ad4c6c830f28bc290cd4bd", "b831ba8c276b4a1bb0ef7ae16a7a8fc9", "7eff20962dfe422b9523c4e74f5372aa", "663c10e13a0e47f7b115ad50bd5b3965", "997504803ac5445588c07cf97049d14a" ] }, "outputId": "61183128-8efa-44c0-83e5-1bddd1d0fc06" }, "source": [ "from transformers import AutoTokenizer\n", " \n", "tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "38effe6811e0445ea6a06fbf62322cb2", "version_minor": 0, "version_major": 2 }, "text/plain": [ "Downloading: 0%| | 0.00/341 [00:00\n", " \n", " \n", " [61/61 00:00]\n", " \n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "execute_result", "data": { "text/plain": [ "{'eval_accuracy': 0.07571226846083562,\n", " 'eval_f1': 0.03137110167927662,\n", " 'eval_loss': 2.604278326034546,\n", " 'eval_precision': 0.018480269594521145,\n", " 'eval_recall': 0.10372178157413056,\n", " 'eval_runtime': 1.5067,\n", " 'eval_samples_per_second': 638.492,\n", " 'eval_steps_per_second': 40.486}" ] }, "metadata": { "tags": [] }, "execution_count": 35 } ] }, { "cell_type": "markdown", "metadata": { "id": "a-xw5JvKzyrf" }, "source": [ "В начале обучения заморозим все параметры в модели, кроме последнего слоя, и посмотрим, насколько хорошо она обучится." ] }, { "cell_type": "code", "metadata": { "id": "lzwwl_YQWKxq" }, "source": [ "for param in model.bert.parameters():\n", " param.requires_grad = False" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "EhRisAHxWZRG", "outputId": "95232006-ea19-46f8-a893-4fdfa44805f1" }, "source": [ "for name, param in model.named_parameters():\n", " if param.requires_grad:\n", " print(name)\n", " print(param)" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "classifier.weight\n", "Parameter containing:\n", "tensor([[-5.3295e-02, 8.1591e-05, -1.4091e-02, ..., 9.4435e-03,\n", " 2.6371e-02, -2.7459e-02],\n", " [-1.4154e-02, 1.8980e-02, -6.4149e-03, ..., -3.0063e-02,\n", " -8.0335e-03, -1.3474e-02],\n", " [ 3.9226e-03, -1.7339e-03, -2.4043e-03, ..., 1.1911e-02,\n", " -6.8623e-03, -3.6764e-02],\n", " ...,\n", " [ 2.9699e-02, -2.5830e-02, 2.9956e-03, ..., 2.0724e-02,\n", " 2.6304e-02, -1.3127e-04],\n", " [-2.8258e-02, 1.9521e-03, -1.2629e-02, ..., -2.4292e-02,\n", " -1.9133e-02, 3.5226e-02],\n", " [ 4.8563e-03, -3.9019e-02, 2.2573e-02, ..., 2.3094e-02,\n", " -5.4334e-03, -3.1281e-02]], device='cuda:0', requires_grad=True)\n", "classifier.bias\n", "Parameter containing:\n", "tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], device='cuda:0',\n", " requires_grad=True)\n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "CdzABDVcIrJg" }, "source": [ "We can now finetune our model by just calling the `train` method:" ] }, { "cell_type": "code", "metadata": { "id": "nsuTXCjMeYHE" }, "source": [ "import logging\n", "from transformers.trainer import logger as noisy_logger\n", "noisy_logger.setLevel(logging.WARNING)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "yGz3c_A_sJBO", "colab": { "base_uri": "https://localhost:8080/", "height": 429 }, "outputId": "187b4d00-27fc-464b-da73-9fd47e2dc862" }, "source": [ "trainer.train()" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [2410/2410 00:31, Epoch 10/10]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossPrecisionRecallF1Accuracy
1No log2.0348660.0321570.0594870.0417470.630991
2No log1.5944690.0421050.0048810.0087480.815724
32.0431001.2824390.0526320.0003050.0006070.826314
42.0431001.0791690.0000000.0000000.0000000.826854
51.2672000.9545400.0000000.0000000.0000000.826896
61.2672000.8806440.0000000.0000000.0000000.826896
70.9325000.8378820.0000000.0000000.0000000.826896
80.9325000.8136640.0000000.0000000.0000000.826896
90.8087000.8011210.0000000.0000000.0000000.826896
100.8087000.7972580.0000000.0000000.0000000.826896

" ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [61/61 00:53]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=2410, training_loss=1.181212188594074, metrics={'train_runtime': 31.5523, 'train_samples_per_second': 1219.246, 'train_steps_per_second': 76.381, 'total_flos': 35752217175750.0, 'train_loss': 1.181212188594074, 'epoch': 10.0})" ] }, "metadata": { "tags": [] }, "execution_count": 39 } ] }, { "cell_type": "markdown", "metadata": { "id": "H14j1R3cbDPO" }, "source": [ "Модель недообучилась: похоже, что нужно обучить больше слоёв. Разморозим их все (но, воможно, более правильно было бы разморозить лишь несколько верхних), и поучимся ещё эпох 20." ] }, { "cell_type": "code", "metadata": { "id": "65soVR9sbE77" }, "source": [ "# разморозка\n", "for param in model.parameters():\n", " param.requires_grad = True" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "u-3sfj5ocug0", "outputId": "32b7bd21-4999-4f15-e1b8-cfd60dcdf4f8" }, "source": [ "args = TrainingArguments(\n", " \"ner\",\n", " evaluation_strategy = \"epoch\",\n", " learning_rate=1e-5,\n", " per_device_train_batch_size=batch_size,\n", " per_device_eval_batch_size=batch_size,\n", " num_train_epochs=20,\n", " weight_decay=0.01,\n", " save_strategy='no',\n", " report_to='none',\n", ")" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "PyTorch: setting up devices\n" ], "name": "stderr" } ] }, { "cell_type": "code", "metadata": { "id": "wGW0r33pdLOy" }, "source": [ "trainer = Trainer(\n", " model,\n", " args,\n", " train_dataset=tokenized_datasets[\"train\"],\n", " eval_dataset=tokenized_datasets[\"test\"],\n", " data_collator=data_collator,\n", " tokenizer=tokenizer,\n", " compute_metrics=compute_metrics\n", ")" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 706 }, "id": "C5nZBs-BbFRq", "outputId": "61effef3-dff8-4296-f3d2-00dedd88011f" }, "source": [ "trainer.train()" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [4820/4820 01:51, Epoch 20/20]\n", "
\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EpochTraining LossValidation LossPrecisionRecallF1Accuracy
1No log0.5841390.7035900.2092740.3225960.851981
2No log0.5165970.6038920.3218430.4199000.863070
30.5711000.4744320.6090950.3840760.4710940.871252
40.5711000.4464050.6244010.4371570.5142650.878769
50.4461000.4239400.6191020.4963390.5509650.885414
60.4461000.4052710.6202400.5366080.5754010.889733
70.3877000.3916460.6304870.5564370.5911520.893222
80.3877000.3814040.6067380.5878580.5971490.894468
90.3490000.3746200.6037740.6150090.6093400.895922
100.3490000.3648990.6212630.6150090.6181200.898787
110.3200000.3568650.6389780.6101280.6242200.900573
120.3200000.3537240.6210750.6275170.6242790.900365
130.3042000.3510880.6128750.6418550.6270300.900947
140.3042000.3448750.6356140.6348380.6352260.903273
150.2903000.3430570.6322290.6403290.6362530.903107
160.2903000.3408330.6373230.6449050.6410920.903896
170.2789000.3381960.6475660.6412450.6443900.904892
180.2789000.3375070.6389470.6516170.6452200.905100
190.2671000.3369350.6375560.6525320.6449570.904975
200.2671000.3367560.6370940.6528370.6448700.905017

" ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "execute_result", "data": { "text/plain": [ "TrainOutput(global_step=4820, training_loss=0.35207317083208395, metrics={'train_runtime': 111.6227, 'train_samples_per_second': 689.286, 'train_steps_per_second': 43.181, 'total_flos': 71548216106580.0, 'train_loss': 0.35207317083208395, 'epoch': 20.0})" ] }, "metadata": { "tags": [] }, "execution_count": 43 } ] }, { "cell_type": "markdown", "metadata": { "id": "CKASz-2vIrJi" }, "source": [ "The `evaluate` method allows you to evaluate again on the evaluation dataset or on another dataset:" ] }, { "cell_type": "code", "metadata": { "id": "UOUcBkX8IrJi", "colab": { "base_uri": "https://localhost:8080/", "height": 188 }, "outputId": "c5772a91-7302-4f14-da69-c99eb281dcd1" }, "source": [ "trainer.evaluate()" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [61/61 00:00]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "execute_result", "data": { "text/plain": [ "{'epoch': 20.0,\n", " 'eval_accuracy': 0.9050170279923582,\n", " 'eval_f1': 0.6448696700316409,\n", " 'eval_loss': 0.3367559015750885,\n", " 'eval_precision': 0.6370943733253944,\n", " 'eval_recall': 0.652837095790116,\n", " 'eval_runtime': 1.1185,\n", " 'eval_samples_per_second': 860.049,\n", " 'eval_steps_per_second': 54.535}" ] }, "metadata": { "tags": [] }, "execution_count": 44 } ] }, { "cell_type": "markdown", "metadata": { "id": "BaMhVjZ-sJBO" }, "source": [ "To get the precision/recall/f1 computed for each category now that we have finished training, we can apply the same function as before on the result of the `predict` method:" ] }, { "cell_type": "code", "metadata": { "id": "wm8MsZ3tsJBO", "colab": { "base_uri": "https://localhost:8080/", "height": 509 }, "outputId": "6fdbd471-7987-47c3-cbd0-83a82bd45ce3" }, "source": [ "predictions, labels, _ = trainer.predict(tokenized_datasets[\"test\"])\n", "predictions = np.argmax(predictions, axis=2)\n", "\n", "# Remove ignored index (special tokens)\n", "true_predictions = [\n", " [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", "]\n", "true_labels = [\n", " [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n", " for prediction, label in zip(predictions, labels)\n", "]\n", "\n", "results = metric.compute(predictions=true_predictions, references=true_labels)\n", "results" ], "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/html": [ "\n", "
\n", " \n", " \n", " [61/61 00:08]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": { "tags": [] } }, { "output_type": "stream", "text": [ "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, modifier, msg_start, len(result))\n" ], "name": "stderr" }, { "output_type": "execute_result", "data": { "text/plain": [ "{'ADR': {'f1': 0.30279898218829515,\n", " 'number': 446,\n", " 'precision': 0.35,\n", " 'recall': 0.26681614349775784},\n", " 'DI': {'f1': 0.493963782696177,\n", " 'number': 821,\n", " 'precision': 0.4207369323050557,\n", " 'recall': 0.5980511571254568},\n", " 'Drugclass': {'f1': 0.7868852459016393,\n", " 'number': 336,\n", " 'precision': 0.7880597014925373,\n", " 'recall': 0.7857142857142857},\n", " 'Drugform': {'f1': 0.7922794117647058,\n", " 'number': 565,\n", " 'precision': 0.8240917782026769,\n", " 'recall': 0.7628318584070797},\n", " 'Drugname': {'f1': 0.8734309623430963,\n", " 'number': 918,\n", " 'precision': 0.8400402414486922,\n", " 'recall': 0.9095860566448801},\n", " 'Finding': {'f1': 0.0, 'number': 192, 'precision': 0.0, 'recall': 0.0},\n", " 'overall_accuracy': 0.9050170279923582,\n", " 'overall_f1': 0.6448696700316409,\n", " 'overall_precision': 0.6370943733253944,\n", " 'overall_recall': 0.652837095790116}" ] }, "metadata": { "tags": [] }, "execution_count": 45 } ] }, { "cell_type": "code", "metadata": { "id": "nI18Xeda7X8a" }, "source": [ "from sklearn.metrics import confusion_matrix\n", "import pandas as pd" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 435 }, "id": "Yz9BkfrO7bg6", "outputId": "d6ce002d-0803-4320-8711-ec33bdc9c40d" }, "source": [ "cm = pd.DataFrame(\n", " confusion_matrix(sum(true_labels, []), sum(true_predictions, []), labels=label_list),\n", " index=label_list,\n", " columns=label_list\n", ")\n", "cm" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
OB-ADRB-DIB-DrugclassB-DrugformB-DrugnameB-FindingI-ADRI-DII-DrugclassI-DrugformI-DrugnameI-Finding
O1949429175356071020260000
B-ADR1591351338200450000
B-DI24221525017100330000
B-Drugclass50117264040000000
B-Drugform984111432170110000
B-Drugname44116188480000000
B-Finding5632875330150000
I-ADR1805140010047300000
I-DI236171021001011460000
I-Drugclass0004000000000
I-Drugform3000010000000
I-Drugname190000390000000
I-Finding25767000250000
\n", "
" ], "text/plain": [ " O B-ADR B-DI ... I-Drugform I-Drugname I-Finding\n", "O 19494 29 175 ... 0 0 0\n", "B-ADR 159 135 133 ... 0 0 0\n", "B-DI 242 21 525 ... 0 0 0\n", "B-Drugclass 50 1 17 ... 0 0 0\n", "B-Drugform 98 4 11 ... 0 0 0\n", "B-Drugname 44 1 16 ... 0 0 0\n", "B-Finding 56 32 87 ... 0 0 0\n", "I-ADR 180 51 40 ... 0 0 0\n", "I-DI 236 17 102 ... 0 0 0\n", "I-Drugclass 0 0 0 ... 0 0 0\n", "I-Drugform 3 0 0 ... 0 0 0\n", "I-Drugname 19 0 0 ... 0 0 0\n", "I-Finding 25 7 6 ... 0 0 0\n", "\n", "[13 rows x 13 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 47 } ] }, { "cell_type": "code", "metadata": { "id": "cA0jWZwjVbI7", "colab": { "base_uri": "https://localhost:8080/" }, "outputId": "4161d7fe-c5e3-4f56-b9d4-52830e14da06" }, "source": [ "model.save_pretrained('ner_bert.bin')\n", "tokenizer.save_pretrained('ner_bert.bin')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Configuration saved in ner_bert.bin/config.json\n", "Model weights saved in ner_bert.bin/pytorch_model.bin\n", "tokenizer config file saved in ner_bert.bin/tokenizer_config.json\n", "Special tokens file saved in ner_bert.bin/special_tokens_map.json\n" ], "name": "stderr" }, { "output_type": "execute_result", "data": { "text/plain": [ "('ner_bert.bin/tokenizer_config.json',\n", " 'ner_bert.bin/special_tokens_map.json',\n", " 'ner_bert.bin/vocab.txt',\n", " 'ner_bert.bin/added_tokens.json',\n", " 'ner_bert.bin/tokenizer.json')" ] }, "metadata": { "tags": [] }, "execution_count": 48 } ] }, { "cell_type": "markdown", "metadata": { "id": "C5yv9hItsJBP" }, "source": [ "# Применение модели" ] }, { "cell_type": "code", "metadata": { "id": "p0JHjRKmuv_m" }, "source": [ "import torch" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 35 }, "id": "Kp59uTtXZKT4", "outputId": "6ed0bcf0-7de7-4b93-936b-0562ef0505b4" }, "source": [ "text = ' '.join(ner_train[8]['tokens'])\n", "text = ' '.join(ner_test[4]['tokens'])\n", "text" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'Охотно применяю его при борьбе с насморком , что в моем случае явление очень частое .'" ] }, "metadata": { "tags": [] }, "execution_count": 50 } ] }, { "cell_type": "code", "metadata": { "id": "6h2hiUylZVmF" }, "source": [ "import torch" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Yt8EXbDuuB1U", "outputId": "bf2699a1-c42f-42bb-b00b-8adb7aca1edd" }, "source": [ "tokens = tokenizer(text, return_tensors='pt')\n", "tokens = {k: v.to(model.device) for k, v in tokens.items()}\n", "\n", "with torch.no_grad():\n", " pred = model(**tokens)\n", "pred.logits.shape" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "torch.Size([1, 29, 13])" ] }, "metadata": { "tags": [] }, "execution_count": 52 } ] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2GQPlbnyuu6H", "outputId": "e43272b6-3e22-44bb-ec7b-15a2acf570d3" }, "source": [ "indices = pred.logits.argmax(dim=-1)[0].cpu().numpy()\n", "token_text = tokenizer.convert_ids_to_tokens(tokens['input_ids'][0])\n", "for t, idx in zip(token_text, indices):\n", " print(f'{t:15s} {label_list[idx]:10s}')" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "[CLS] O \n", "О O \n", "##хо O \n", "##тно O \n", "при O \n", "##мен O \n", "##я O \n", "##ю O \n", "его O \n", "при O \n", "борьбе O \n", "с O \n", "нас B-DI \n", "##мор B-DI \n", "##ком B-DI \n", ", O \n", "что O \n", "в O \n", "м O \n", "##ое O \n", "##м O \n", "случае O \n", "я O \n", "##вление O \n", "очень O \n", "часто O \n", "##е O \n", ". O \n", "[SEP] O \n" ], "name": "stdout" } ] }, { "cell_type": "markdown", "metadata": { "id": "tBSq9enuwJ_V" }, "source": [ "Более простое применение модели: пайплайн от huggingface" ] }, { "cell_type": "code", "metadata": { "id": "lnrAoy6b8swA" }, "source": [ "from transformers import pipeline" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "uowfISMu8v1k" }, "source": [ "pipe = pipeline(model=model, tokenizer=tokenizer, task='ner', aggregation_strategy='average', device=0)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1WjXMXCv9Nde", "outputId": "e2b4f6d6-6153-4bbd-915d-a3bc4d148895" }, "source": [ "print(text)\n", "print(pipe(text))" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Охотно применяю его при борьбе с насморком , что в моем случае явление очень частое .\n", "[{'entity_group': 'DI', 'score': 0.73669535, 'word': 'насморком', 'start': 33, 'end': 42}]\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "panjTvbH9PJL" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }