{ "cells": [ { "cell_type": "markdown", "id": "07463a5f", "metadata": {}, "source": [ "# サポートベクトルマシン\n", "\n", "サポートベクトルマシン全般については次を確認してください。\n", "[https://scikit-learn.org/stable/modules/svm.html](https://scikit-learn.org/stable/modules/svm.html)" ] }, { "cell_type": "markdown", "id": "22ae72e6", "metadata": {}, "source": [ "**データとモジュールのロード**" ] }, { "cell_type": "code", "execution_count": 1, "id": "4f42c570", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn import model_selection\n", "\n", "data = pd.read_csv(\"input/pn_same_judge_preprocessed.csv\")\n", "train, test = model_selection.train_test_split(data, test_size=0.1, random_state=0)" ] }, { "cell_type": "code", "execution_count": 2, "id": "55fdf55a", "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.metrics import ConfusionMatrixDisplay\n", "from sklearn.metrics import PrecisionRecallDisplay" ] }, { "cell_type": "markdown", "id": "310d666d", "metadata": {}, "source": [ "## SVC" ] }, { "cell_type": "markdown", "id": "ea30df74", "metadata": {}, "source": [ "[sklearn.svm.LinearSVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html)\n", "を使います。" ] }, { "cell_type": "code", "execution_count": 3, "id": "79cc90a0", "metadata": {}, "outputs": [], "source": [ "from sklearn.svm import LinearSVC" ] }, { "cell_type": "code", "execution_count": 4, "id": "bb3dbb21", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('vect',\n",
       "                 TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)),\n",
       "                ('clf', LinearSVC())])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('vect',\n", " TfidfVectorizer(tokenizer=)),\n", " ('clf', LinearSVC())])" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipe = Pipeline([\n", " (\"vect\", TfidfVectorizer(tokenizer=str.split)),\n", " (\"clf\", LinearSVC())\n", "])\n", "\n", "pipe.fit(train[\"tokens\"], train[\"label_num\"])" ] }, { "cell_type": "code", "execution_count": 5, "id": "441429d6", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "pred = pipe.predict(test[\"tokens\"])\n", "ConfusionMatrixDisplay.from_predictions(y_true=test[\"label_num\"], y_pred=pred)" ] }, { "cell_type": "code", "execution_count": 6, "id": "dd63ab0c", "metadata": {}, "outputs": [], "source": [ "# SVC では predict_proba ではなく decision_function を使います\n", "score = pipe.decision_function(test[\"tokens\"])" ] }, { "cell_type": "code", "execution_count": 7, "id": "2b58a9c2", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEGCAYAAABo25JHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAAkmElEQVR4nO3de7xVZb3v8c9XIMG8C+6XcpElYIWoa+tS0vSEZm68Ul5CSI1SyUrr2O5iZ++81tmWaWd7cm8lRS0NdFMJGsX2gkilyMIQZZmKyJaFnEIw0Y038Hf+GGPhXJO11hwL5mXNOb/v12u9mGOMZ4zxG6Dzt57LeB5FBGZmVr+2q3QAZmZWWU4EZmZ1zonAzKzOORGYmdU5JwIzszrXu9IBdFf//v1j6NChlQ7DzKyqLFq06JWIGNDRsapLBEOHDqW5ubnSYZiZVRVJ/9XZMTcNmZnVOScCM7M650RgZlbnnAjMzOqcE4GZWZ0rWSKQNFXSXyU93clxSbpe0jJJSyQdXKpYzMysc6WsEdwGjO3i+PHAiPRnMvDvJYzFzMw6UbL3CCLiEUlDuygyDvhZJPNgPyZpV0l7RcTqUsRzxb1LaXl5fSkuXXPGNQ5k4ughlQ7DzMqkkn0EA4GVOdut6b4tSJosqVlS85o1a8oSXL1qWb2emYtXVToMMyujqnizOCKmAFMAmpqatmolnctO3r+oMdWq8Tc9WukQzKzMKlkjWAUMztkelO4zM7MyqmQimAWck44e+ijwWqn6B8zMrHMlaxqSNA0YA/SX1ApcBvQBiIgbgdnACcAyYAPw+VLFYmZmnSvlqKEJBY4H8JVS3d/MzLLxm8VmZnXOicDMrM45EZiZ1TknAjOzOudEYGZW56rizWKrTr9Y8NIW01V4HiOznseJwLqtoy/4jix4cR0Aoxt2B5J5jAAnArMexonACsr/4s//gu/M6Ibd29UAPI+RWc/kRGDttH3J535p53/x53/Bm1l1cyKwgvzFb1bbnAisnSOH9wfgjvNGVzgSMysXJwJrxwnArP44EVhV8tBUs+JxIrAexUNTzcrPicAqykNTzSrPicDKxkNTzXomJwKrKH/xm1VeSROBpLHAvwK9gJsj4uq84/sAU4EBwDrgrIhoLWVMVjkemmrWM5VyzeJewA3AJ4FWYKGkWRHRklPsR8DPIuJ2SccA/wKcXaqYrLKcAMx6plLWCA4DlkXEcgBJ04FxQG4iGAl8Pf08F7inhPFYDeuo/wE8pNQsi1ImgoHAypztViD/V8IngVNJmo8+DewkaY+IWJtbSNJkYDLAkCH+n9qyqfSQ0s6Gwjo5WU9T6c7ibwA/kTQJeARYBWzKLxQRU4ApAE1NTVHOAK06dNT/UM4hpR196Xc0FLbSycmsI6VMBKuAwTnbg9J9m0XEyyQ1AiTtCJwWEX8rYUxWo0rZ/5DlJbeOvvQ7GhHl9x2sJyplIlgIjJDUQJIAzgQm5haQ1B9YFxHvAd8hGUFkVlFb85Kbh8FaNStZIoiIjZIuBOaQDB+dGhFLJV0JNEfELGAM8C+SgqRp6CulisesI1madEr5JZ91Sg1w34KVTkn7CCJiNjA7b9+lOZ9nADNKGYNZm6zt+KX84s8f3ZR1So1i9S1kTTyVTnxOeuVV6c5is5LJ8qVb6SadrPffmr6FrIkvXzE7tLemmc0d6uXnRGB1o9Jf+lC8t6uL2YGdL0vS2dpZYot1/85iqPS/b7XKlAgk7Ql8DNgbeBN4mqSd/70Sxma2TXrilBZbG8vWNCltbeLr7OW8jsp0d5bYYt2/oxhck9h6XSYCSUcDlwC7A38C/gr0BT4FDJM0A7g2ItaXOE6zbutJCaDYKl27qfT9O4qho8ThPolsCtUITgDOj4iX8g9I6g2cRDKX0C9LEJuZpcpZu6l0TWpr759lmvOOuCYBiqiuF3Wbmpqiubm50mGYWQ8z9JLfAFt+6Wftk7jri4eXLrgeQNKiiGjq6NhWdxZL+nxE3Lr1YZmZFU+lazLVbFtGDV0BOBGYWY/gBLD1CnUWL+nsEPB3xQ/HzMzKrVCN4O+AfwBezdsv4I8licjMrIy8lkXhRHAfsGNELM4/IOnhUgRkZlZp9TaSqMtEEBHndnFsYmfHzMyqRaXXsugJPMWEmdU1dzLDdpUOwMzMKsuJwMyszjkRmJnVucyJQNKUrrbNzKw6dadGcFOB7S1IGivpWUnLJF3SwfEhkuZK+pOkJZJO6EY8ZmZWBJkTQUQs6mo7n6RewA3A8cBIYIKkkXnF/hm4OyL+nmRx+3/LGo+ZmRVHoSkm7gU6nZ40Ik7p4vTDgGURsTy91nRgHNCSewlg5/TzLsDLGWI2M7MiKvQewY+24doDgZU5261A/oDdy4H/lHQR8EHg2G24n5mZbYVCbxbPa/ssqR8wJCKeLeL9JwC3RcS1kg4Hfi5pVP4SmJImA5MBhgypj1e+zczKJVMfgaSTgcXA79LtRkmzCpy2Chicsz0o3ZfrXOBugIh4lGQZzP75F4qIKRHRFBFNAwYMyBKymZlllLWz+HKSNv+/AaST0DUUOGchMEJSg6QPkHQG5yePl4BPAEj6CEkiWJMxJjMzK4KsieDdiHgtb1+Xa1xGxEbgQmAO8AzJ6KClkq6U1NbJ/I/A+ZKeBKYBk6La1s40M6tyWSedWyppItBL0gjgq2RYjyAiZgOz8/ZdmvO5BfhY9nDNzKzYstYILgL2B94m+c19PfA/SxSTmZmVUaYaQURsAP5J0g+SzXi9tGGZmVm5ZB01dKikp4AlwFOSnpR0SGlDMzOzcsjaR3AL8OWImA8g6UjgVuDAUgVmZmblkbWPYFNbEgCIiN8DG0sTkpmZlVOhuYYOTj/Ok3QTSUdxAOOBh0sbmpmZlUOhpqFr87Yvy/ns8f5mZjWg0FxDR5crEDMzq4ysncVIOpHkXYK+bfsi4spSBGVmZuWTKRFIuhHYATgauBk4HXi8hHGZmfUYv1jwEjMX58+ZCeMaBzJxdPXPiJy1RnBERBwoaUlEXCHpWuC3pQzMzKxSFry4DoDxNz3abnt0w+6by7SsXg9QV4ngzfTPDZL2BtYCe5UmJDOznmV0w+5b/PbfliRqQdZEcJ+kXYFrgCdIRgzdXKqgzMwq6cjhybIod5yXv6hibco619BV6cdfSroP6NvBtNRmZjWhXhJAm0IvlJ3axTEi4lfFD8nMzMqpUI3g5C6OBeBEYGZW5Qq9UPb5cgViZmaVkXXSOTMzq1ElTQSSxkp6VtIySZd0cPzHkhanP89J+lsp4zEzsy1lnmKiuyT1Am4APgm0AgslzUrXKQYgIi7OKX8R8PelisfMzDqWdYWyHSR9V9JP0+0Rkk4qcNphwLKIWB4R7wDTgXFdlJ9AMs21mZmVUdamoVtJFq4/PN1eBXyvwDkDgZU5263pvi1I2gdoAB7q5PhkSc2SmtesWZMxZDMzyyJrIhgWET8E3oXNi9mriHGcCcyIiE0dHYyIKRHRFBFNAwYMKOJtzcwsayJ4R1I/0sVoJA0jqSF0ZRUwOGd7ULqvI2fiZiEzs4rImgguB34HDJZ0J/Ag8K0C5ywERkhqkPQBki/7WfmFJH0Y2A2onRmczMyqSNa5hv5T0iLgoyRNQl+LiFcKnLNR0oXAHKAXMDUilkq6EmiOiLakcCYwPSK89KWZWQVkXZjmXuAXwKyI+O+sF4+I2cDsvH2X5m1fnvV6ZmZWfFmbhn4EHAW0SJoh6XRJfQudZGZmPV/WpqF5wLz0JbFjgPOBqcDOJYzNzMzKoDuL1/cjmY10PHAwcHupgjIzs/LJ2kdwN8mbwr8DfgLMi4j3ShmYmZmVR9YawS3AhM5e+DIzqzf5C9y3yV/buBoUWqHsmIh4CPggME5q/zKxVygzM3tfy+r1ALWVCICPk8z/09FKZV6hzMzqVkcL3OfXDqpFoRXKLks/XhkRL+Yek9RQsqjMzHq4WlrgPut7BL/sYN+MYgZiZmaVUaiP4MPA/sAukk7NObQz4BfKzMxqQKE+gg8BJwG70r6f4HWSl8rMzKzKFeojmAnMlHR4RFRnL4iZmXWpUNPQt9IFaSZKmpB/PCK+WrLIzMysLAo1DT2T/tlc6kDMzKwyCjUN3Zv+uXleIUnbATtGxPoSx2ZmZmWQafiopF9I2lnSB4GnSaaj/mZpQzMzs3LI+h7ByLQG8Cngt0ADcHapgjIzs/LJmgj6SOpDkghmRcS7pAvZd0XSWEnPSlom6ZJOynxGUoukpZJ+kTlyMzMriqyzj94ErACeBB6RtA/QZR9BuojNDcAngVZgoaRZEdGSU2YE8B3gYxHxqqQ9u/8IZma2LTLVCCLi+ogYGBEnROK/gKMLnHYYsCwilkfEO8B0YFxemfOBGyLi1fQ+f+1m/GZmto2ydhbvIuk6Sc3pz7UkU1N3ZSCwMme7Nd2Xaz9gP0l/kPSYpLGd3H9y273XrFmTJWQzM8soax/BVJJpJT6T/qwHbi3C/XsDI4AxwATgp5J2zS8UEVMioikimgYMGFCE25qZWZusfQTDIuK0nO0rJC0ucM4qYHDO9qB0X65WYEHa+fyipOdIEsPCjHGZmdk2ylojeFPSkW0bkj4GvFngnIXACEkNkj4AnAnMyitzD0ltAEn9SZqKlmeMyczMiiBrjeAC4GeSdkm3XwU+19UJEbFR0oXAHKAXMDUilkq6EmiOiFnpseMktQCbgG9GxNqteRAzM9s6BROBpEZgOMlv9KsAsk4vERGzgdl5+y7N+RzA19MfMzOrgC6bhiRdCtwNnAb8BhjvOYbMzGpLoRrBeKAxIjZI2gP4HfDT0odlZmblUigRvB0RGwAiYm0686iZmXVgwYvrABh/0/vreI1rHMjE0UMqFVImhRLBvpLaRvoIGJazTUScUrLIzMyqXMvqpCW92hNB/pQQPypVIGZm1e7I4f0BuOO80UD7mkFPVmhhmnnlCsTMrNq1JYBqU2jU0L2STk6noM4/tq+kKyV9oXThmZlZqRVqGjqfZIz//5G0DlgD9AWGAi8AP4mImSWN0MzMSqpQ09D/A74FfEvSUGAvkqklnmsbTWRmZtUt6xQTRMQKksVpzMyshvi9ADOzOudEYGZW55wIzMzqXKY+gnT9gcuBfdJzRDJ56L6lC83MzMoha2fxLcDFwCKSdQPMzKxGZE0Er0XEb0saiZmZVUTWRDBX0jXAr4C323ZGxBMlicrMzMomayJom0CjKWdfAMd0dZKkscC/kixVeXNEXJ13fBJwDe8vav+TiLg5Y0xmZlYEmRJBRBzd3QtL6gXcAHwSaAUWSpoVES15Re+KiAu7e30zMyuOTMNHJe0i6TpJzenPtTkL2XfmMGBZRCyPiHeA6Ww5rbWZmVVY1vcIpgKvA59Jf9YDtxY4ZyCwMme7Nd2X7zRJSyTNkDS4owtJmtyWhNasWZMxZDMzyyJrIhgWEZelv90vj4grgGK8Q3AvMDQiDgTuB27vqFBETImIpohoGjBgQBFua2ZmbbImgjclHdm2kb5g9maBc1YBub/hD+L9TmEgWQc5ItpGId0MHJIxHjMzK5Kso4a+BNye9gsIWAdMKnDOQmCEpAaSBHAmMDG3gKS9ImJ1unkK8EzGeMzMrEiyjhpaDBwkaed0e32GczZKuhCYQzJ8dGpELJV0JdAcEbOAr0o6BdhItuRiZmZF1mUikHRWRNwh6et5+wGIiOu6Oj8iZgOz8/ZdmvP5O8B3uhmzmZkVUaEawQfTP3cqdSBmZlYZhZaqvCn984ryhGNmZuWW9YWyH0raWVIfSQ9KWiPprFIHZ2ZmpZd1+OhxaQfxSSTrFg8HvlmqoMzMrHyyJoK2JqQTgf+IiNdKFI+ZmZVZ1vcI7pP0Z5KXyL4kaQDwVunCMjOzcslUI4iIS4AjgKaIeBf4bzyBnJlZTSj0HsExEfGQpFNz9uUW+VWpAjMzs/Io1DT0ceAh4OQOjgVOBGZmVa/QewSXpX9+vjzhmJlZuWV9j+B/S9o1Z3s3Sd8rWVRmZlY2WYePHh8Rf2vbiIhXgRNKEpGZmZVV1kTQS9L2bRuS+gHbd1HezMyqRNb3CO4EHpTUtjzl5+lkNTEzM6suWdcj+IGkJ4Fj011XRcSc0oVlZmblkrVGAMnqYRsj4gFJO0jaKSJeL1VgZmbVbsGL6wAYf9Oj7faPaxzIxNFDKhFSh7KOGjofmAHclO4aCNxTopjMzGpWy+r1zFy8qnDBMspaI/gKcBiwACAinpe0Z6GTJI0F/pVkqcqbI+LqTsqdRpJoDo2I5owxmZn1aEcO7w/AHeeN3rwvv3bQE2RNBG9HxDtt00tI6k3yZnGnJPUCbgA+CbQCCyXNioiWvHI7AV8jTTJmZrUiNwH0ZFmHj86T9L+AfpI+CfwHcG+Bcw4DlkXE8oh4B5hOxxPVXQX8AM9mamZWEVkTwbeBNcBTwBdJFqT/5wLnDARW5my3pvs2k3QwMDgiftPVhSRNltQsqXnNmjUZQzYzsywKNg2lTTxLI+LDwE+LdWNJ2wHXAZMKlY2IKcAUgKampi6bpMzMrHsK1ggiYhPwrKTujnVaBQzO2R6U7muzEzAKeFjSCuCjwCxJTd28j5mZbYOsncW7AUslPU6yKA0AEXFKF+csBEZIaiBJAGcCE3POfQ3o37Yt6WHgGx41ZGZWXlkTwXe7e+GI2CjpQmAOyfDRqRGxVNKVQHNEzOruNc3MrPgKrVDWF7gAGE7SUXxLRGzMevGImE3SsZy779JOyo7Jel0zMyueQn0EtwNNJEngeODakkdkZmZlVahpaGREHAAg6Rbg8dKHZGZW21pWr2/3hnGl5x4qlAjebfuQtvmXOBwzs9o2rrHd61S0rF4P0KMTwUGS1qefRfJm8fr0c0TEziWNzsysxkwcPaTdl35PmHuo0OL1vcoViJmZVUbWKSbMzKxGORGYmdU5JwIzszrnRGBmVuecCMzM6pwTgZlZnXMiMDOrc04EZmZ1zonAzKzOORGYmdU5JwIzszrnRGBmVudKmggkjZX0rKRlki7p4PgFkp6StFjS7yWNLGU8Zma2paxrFnebpF7ADcAngVZgoaRZEdGSU+wXEXFjWv4U4DpgbKliMjPraRa8uA7Ycjrqci5WU8oawWHAsohYHhHvANOBcbkFImJ9zuYHgShhPGZmVaFl9XpmLl5VtvuVrEYADARW5my3AqPzC0n6CvB14APAMR1dSNJkYDLAkCGVW8XHzKzYjhzeH4A7znv/67Hci9WUMhFkEhE3ADdImgj8M/C5DspMAaYANDU1udZgZjUjNwFUSikTwSpgcM72oHRfZ6YD/741N3r33XdpbW3lrbfe2prTrYfp27cvgwYNok+fPpUOxawulDIRLARGSGogSQBnAhNzC0gaERHPp5snAs+zFVpbW9lpp50YOnQokrYlZquwiGDt2rW0trbS0NBQ6XDM6kLJEkFEbJR0ITAH6AVMjYilkq4EmiNiFnChpGOBd4FX6aBZKIu33nrLSaBGSGKPPfZgzZo1lQ7FrG6UtI8gImYDs/P2XZrz+WvFupeTQO3wv6VZefnNYjOzOudEUCTf//732X///TnwwANpbGxkwYIFXHHFFXznO99pV27x4sV85CMfAeCNN97gi1/8IsOGDeOQQw5hzJgxLFiwYItrRwTHHHMM69e//9rFPffcgyT+/Oc/b963YsUK+vXrR2NjIyNHjuSCCy7gvffe26bnevvttxk/fjzDhw9n9OjRrFixosNyP/7xj9l///0ZNWoUEyZM2Nxxf9RRR9HY2EhjYyN77703n/rUpwC47777uPTSSzu8lpmVlxNBETz66KPcd999PPHEEyxZsoQHHniAwYMHM2HCBO666652ZadPn86ECRMAOO+889h99915/vnnWbRoEbfeeiuvvPLKFtefPXs2Bx10EDvvvPPmfdOmTePII49k2rRp7coOGzaMxYsXs2TJElpaWrjnnnu26dluueUWdtttN5YtW8bFF1/Mt7/97S3KrFq1iuuvv57m5maefvppNm3axPTp0wGYP38+ixcvZvHixRx++OGceuqpAJx44once++9bNiwYZviM7NtV/H3CIrtinuX0vLy+sIFu2Hk3jtz2cn7d3p89erV9O/fn+233x6A/v37bz622267sWDBAkaPTsYK33333cyZM4cXXniBBQsWcOedd7Lddkk+bmho6HCkzJ133snkyZM3b7/xxhv8/ve/Z+7cuZx88slcccUVW5zTu3dvjjjiCJYtW7Z1D52aOXMml19+OQCnn346F154IRGxRTv+xo0befPNN+nTpw8bNmxg7733bnd8/fr1PPTQQ9x6661A0g8wZswY7rvvPj7zmc9sU4xmtm1cIyiC4447jpUrV7Lffvvx5S9/mXnz5m0+NmHChM2/HT/22GPsvvvujBgxgqVLl9LY2EivXr0KXv8Pf/gDhxxyyObtmTNnMnbsWPbbbz/22GMPFi1atMU5GzZs4MEHH+SAAw7Y4lhuc03uzwMPPLBF2VWrVjF4cPI6SO/evdlll11Yu3ZtuzIDBw7kG9/4BkOGDGGvvfZil1124bjjjmtX5p577uETn/hEu1pNU1MT8+fPL/j8ZlZaNVcj6Oo391LZcccdWbRoEfPnz2fu3LmMHz+eq6++mkmTJjF+/HiOOOIIrr322nbNQt2xbt06dtppp83b06ZN42tfSwZcnXnmmUybNm1zonjhhRdobGxEEuPGjeP444/f4nrF/vJ99dVXmTlzJi+++CK77rorZ5xxBnfccQdnnXVWu5jPO++8duftueeevPzyy0WNxcy6r+YSQaX06tWLMWPGMGbMGA444ABuv/12Jk2axODBg2loaGDevHn88pe/5NFHkzlE9t9/f5588kk2bdpUsFbQu3dv3nvvPbbbbjvWrVvHQw89xFNPPYUkNm3ahCSuueYa4P0+gq4cddRRvP7661vs/9GPfsSxxx7bbt/AgQNZuXIlgwYNYuPGjbz22mvsscce7co88MADNDQ0MGDAAABOPfVU/vjHP25OBK+88gqPP/44v/71r9ud99Zbb9GvX78uYzWz0nMiKIJnn32W7bbbjhEjRgDJyKB99tln8/EJEyZw8cUXs++++zJo0CAg+cJuamrisssu46qrrkISK1asYOnSpZx44ontrv+hD32I5cuXM3z4cGbMmMHZZ5/NTTfdtPn4xz/+cebPn595Qr7u1AhOOeUUbr/9dg4//HBmzJjBMcccs0X/wJAhQ3jsscfYsGED/fr148EHH6SpqWnz8RkzZnDSSSfRt2/fduc999xzjBo1KnMsZvWis6mpC/VXbi33ERTBG2+8wec+9zlGjhzJgQceSEtLy+YOVoAzzjiDpUuXbtEsdPPNN/OXv/yF4cOHM2rUKCZNmsSee+65xfVPPPFEHn74YSBpYvn0pz/d7vhpp522xeihYjn33HNZu3Ytw4cP57rrruPqq68G4OWXX+aEE04AYPTo0Zx++ukcfPDBHHDAAbz33nvtOrc7axKbO3fuFknPzMpPEdU1mWdTU1M0Nze32/fMM89sHptfi1avXs0555zD/fffX+lQiuYvf/kLEydO5MEHH+zweK3/m5p15aybk/eJijkzqaRFEdHU0TE3DVWBvfbai/PPP5/169e3G3VTzV566SWuvfbaSodh1iOVe2pqJ4IqUWtj7Q899NBKh2BmqZrpI6i2Ji7rnP8tzcqrJhJB3759Wbt2rb9AakDbegT5I4zMrHRqomlo0KBBtLa2eg77GtG2QpmZlUdNJII+ffp4NSszs61UE01DZma29ZwIzMzqnBOBmVmdq7o3iyWtAf5rK0/vD2y58ktt8zPXBz9zfdiWZ94nIgZ0dKDqEsG2kNTc2SvWtcrPXB/8zPWhVM/spiEzszrnRGBmVufqLRFMqXQAFeBnrg9+5vpQkmeuqz4CMzPbUr3VCMzMLI8TgZlZnavJRCBprKRnJS2TdEkHx7eXdFd6fIGkoRUIs6gyPPPXJbVIWiLpQUn7dHSdalLomXPKnSYpJFX9UMMszyzpM+m/9VJJvyh3jMWW4b/tIZLmSvpT+t/3CZWIs1gkTZX0V0lPd3Jckq5P/z6WSDp4m28aETX1A/QCXgD2BT4APAmMzCvzZeDG9POZwF2VjrsMz3w0sEP6+Uv18MxpuZ2AR4DHgKZKx12Gf+cRwJ+A3dLtPSsddxmeeQrwpfTzSGBFpePexmf+H8DBwNOdHD8B+C0g4KPAgm29Zy3WCA4DlkXE8oh4B5gOjMsrMw64Pf08A/iEJJUxxmIr+MwRMTciNqSbjwHVPs9zln9ngKuAHwBvlTO4EsnyzOcDN0TEqwAR8dcyx1hsWZ45gLY1XHcBXi5jfEUXEY8A67ooMg74WSQeA3aVtNe23LMWE8FAYGXOdmu6r8MyEbEReA3YoyzRlUaWZ851LslvFNWs4DOnVebBEfGbcgZWQln+nfcD9pP0B0mPSRpbtuhKI8szXw6cJakVmA1cVJ7QKqa7/78XVBPrEVh2ks4CmoCPVzqWUpK0HXAdMKnCoZRbb5LmoTEktb5HJB0QEX+rZFAlNgG4LSKulXQ48HNJoyLivUoHVi1qsUawChicsz0o3ddhGUm9SaqTa8sSXWlkeWYkHQv8E3BKRLxdpthKpdAz7wSMAh6WtIKkLXVWlXcYZ/l3bgVmRcS7EfEi8BxJYqhWWZ75XOBugIh4FOhLMjlbrcr0/3t31GIiWAiMkNQg6QMkncGz8srMAj6Xfj4deCjSXpgqVfCZJf09cBNJEqj2dmMo8MwR8VpE9I+IoRExlKRf5JSIaK5MuEWR5b/te0hqA0jqT9JUtLyMMRZblmd+CfgEgKSPkCSCWl63dhZwTjp66KPAaxGxelsuWHNNQxGxUdKFwBySEQdTI2KppCuB5oiYBdxCUn1cRtIpc2blIt52GZ/5GmBH4D/SfvGXIuKUigW9jTI+c03J+MxzgOMktQCbgG9GRNXWdjM+8z8CP5V0MUnH8aRq/sVO0jSSZN4/7fe4DOgDEBE3kvSDnAAsAzYAn9/me1bx35eZmRVBLTYNmZlZNzgRmJnVOScCM7M650RgZlbnnAjMzOqcE4GVnKRNkhZLelrSvZJ2LfL1V6Rj5pH0Ridl+kmaJ6mXpKGS3kxjapF0Y/omcnfu2STp+vTzGElH5By7QNI52/JM6XUul/SNAmVuk3R6N645tLNZLfPKfV/Syvy/T0kXSvpC1vtZdXAisHJ4MyIaI2IUyXsbX6lADF8AfhURm9LtFyKiETiQZMbKT3XnYhHRHBFfTTfHAEfkHLsxIn62rQFX2L0kE77lm0rtz+VTd5wIrNweJZ0gS9IwSb+TtEjSfEkfTvf/naRfS3oy/Tki3X9PWnappMndvO9ngZn5O9NJB/8IDE9/W35I76/ZMCS97xlpbeZJSY+k+8ZIuk/JWhYXABenNYyj2n6Tl/RhSY+33Su9/lPp50PSGsoiSXNUYPZISedLWpjG8EtJO+QcPlZSs6TnJJ2Ulu8l6Zr0nCWSvtidv6yIeKyjt1XTGWxXSOooSViVciKwspHUi2QqgLa3fqcAF0XEIcA3gH9L918PzIuIg0jmZV+a7v9CWrYJ+KqkTDPGplMT7BsRKzo4tkMa01PA/wVuj4gDgTvTOAAuBf4hjafd29jpNW8EfpzWeubnHPsz8AFJDemu8cBdkvqk9zo9fZ6pwPcLPMavIuLQNIZnSObXaTOU5Lf3E4EbJfVNj78WEYcChwLn58TR9ux7S5pd4L4daQaO2orzrIequSkmrEfqJ2kxSU3gGeB+STuSNKe0TXkBsH365zHAOQBpU85r6f6vSvp0+nkwyWRqWaZP6A/8LW/fsDSmAGZGxG8l/Rw4NT3+c+CH6ec/ALdJuhv4VYb75bqbJAFcnf45HvgQyYR496fP3gsoNFfMKEnfA3YlmSpkTu490pk2n5e0HPgwcBxwYE7/wS4kf1/PtZ0UES+TTFXQXX9N72E1wonAyuHNiGhMf/ueQ9JHcBvwt7SdviBJY4BjgcMjYoOkh0kmF8t0/w7KvpD13hFxgaTRJL9xL5J0SMb7AtxFkux+lVwqnpd0ALA0Ig7vxnVuAz4VEU9KmkQ6sVxbiPkhk6xedVFE5CYMVJxlWfuS/J1ajXDTkJVN2r78VZJJwjYAL0o6Azavw3pQWvRBkuU029q6dyH5jfbVNAl8mGRa6az3fRXolTaZdOWPvD8B4WeB+WkMwyJiQURcSjKr5eC8814nmfa6o3u/QDL523dJkgLAs8AAJXPnI6mPpP0LxLYTsDptVvps3rEzJG0naRjJko7PkiTcL6XlkbSfpA8WuEdW+wEFRx5Z9XAisLKKiD8BS0gWE/kscK6kJ0n6AdqWIPwacHTasbqIZFTP74Dekp4haWZ5rJu3/k/gyAJlLgI+L2kJcHYaB8A1kp5Kh13+kWTd3Fz3Ap9u6yzu4Lp3AWfx/pz575BMf/6D9NkXkzPqqBPfBRaQNFP9Oe/YS8DjJKvOXRARbwE3Ay3AE2ncN5HXAtBVH4GkHyqZ+XIHSa2SLs85/DHg/gLxWhXx7KNWF5QsW3lxRJxd6ViqmZJ1Lb7uv8fa4hqB1YWIeAKYm45csq3Xn6R2YjXENQIzszrnGoGZWZ1zIjAzq3NOBGZmdc6JwMyszjkRmJnVuf8Pd6NG4a3GBcsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "PrecisionRecallDisplay.from_predictions(\n", " y_true=test[\"label_num\"],\n", " y_pred=score,\n", " name=\"SVC\",\n", ")" ] }, { "cell_type": "markdown", "id": "8b54610a", "metadata": {}, "source": [ "## 不均衡データに対応する\n", "\n", "`class_weight` パラメータで不均衡データに対応できます。" ] }, { "cell_type": "code", "execution_count": 8, "id": "b0ed478e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('vect',\n",
       "                 TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)),\n",
       "                ('clf', LinearSVC(class_weight='balanced'))])
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" ], "text/plain": [ "Pipeline(steps=[('vect',\n", " TfidfVectorizer(tokenizer=)),\n", " ('clf', LinearSVC(class_weight='balanced'))])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pipe_weight = Pipeline([\n", " (\"vect\", TfidfVectorizer(tokenizer=str.split)),\n", " (\"clf\", LinearSVC(class_weight=\"balanced\"))\n", "])\n", "\n", "pipe_weight.fit(train[\"tokens\"], train[\"label_num\"])" ] }, { "cell_type": "code", "execution_count": 9, "id": "0a600cfd", "metadata": {}, "outputs": [], "source": [ "score_weight = pipe_weight.decision_function(test[\"tokens\"])" ] }, { "cell_type": "markdown", "id": "1d160f50", "metadata": {}, "source": [ "class_weightオプションを付けないモデルと比較します。" ] }, { "cell_type": "code", "execution_count": 10, "id": "9935b0d8", "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "_, ax = plt.subplots()\n", "for name, pred in [\n", " (\"SVC\", score),\n", " (\"SVC+balanced\", score_weight),\n", "]:\n", " PrecisionRecallDisplay.from_predictions(ax=ax, y_true=test[\"label_num\"], y_pred=pred, name=name)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.6" } }, "nbformat": 4, "nbformat_minor": 5 }