{ "cells": [ { "cell_type": "markdown", "id": "07463a5f", "metadata": {}, "source": [ "# サポートベクトルマシン\n", "\n", "サポートベクトルマシン全般については次を確認してください。\n", "[https://scikit-learn.org/stable/modules/svm.html](https://scikit-learn.org/stable/modules/svm.html)" ] }, { "cell_type": "markdown", "id": "22ae72e6", "metadata": {}, "source": [ "**データとモジュールのロード**" ] }, { "cell_type": "code", "execution_count": 1, "id": "4f42c570", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn import model_selection\n", "\n", "data = pd.read_csv(\"input/pn_same_judge_preprocessed.csv\")\n", "train, test = model_selection.train_test_split(data, test_size=0.1, random_state=0)" ] }, { "cell_type": "code", "execution_count": 2, "id": "55fdf55a", "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.feature_extraction.text import TfidfVectorizer\n", "from sklearn.metrics import ConfusionMatrixDisplay\n", "from sklearn.metrics import PrecisionRecallDisplay" ] }, { "cell_type": "markdown", "id": "310d666d", "metadata": {}, "source": [ "## SVC" ] }, { "cell_type": "markdown", "id": "ea30df74", "metadata": {}, "source": [ "[sklearn.svm.LinearSVC](https://scikit-learn.org/stable/modules/generated/sklearn.svm.LinearSVC.html)\n", "を使います。" ] }, { "cell_type": "code", "execution_count": 3, "id": "79cc90a0", "metadata": {}, "outputs": [], "source": [ "from sklearn.svm import LinearSVC" ] }, { "cell_type": "code", "execution_count": 4, "id": "bb3dbb21", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Pipeline(steps=[('vect',\n", " TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)),\n", " ('clf', LinearSVC())])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('vect',\n", " TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)),\n", " ('clf', LinearSVC())])
TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)
LinearSVC()
Pipeline(steps=[('vect',\n", " TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)),\n", " ('clf', LinearSVC(class_weight='balanced'))])In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
Pipeline(steps=[('vect',\n", " TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)),\n", " ('clf', LinearSVC(class_weight='balanced'))])
TfidfVectorizer(tokenizer=<method 'split' of 'str' objects>)
LinearSVC(class_weight='balanced')