python SVC
Q&A
Closed
腫瘍の計測データから、悪性か陽性か判断する機械学習をさせようとしているのですが
CSVファイルを読み込み、Xとyにそれぞれ分け 悪性を1、陽性を0へ数値化、
testを2割、trainを8割に設定したところまでは良かったのですが、
モデルを作って(SVC)学習しようとするとエラーが出ます。
解決方法をご教授願います、
# breast_cancer_wisconsin_data.csv を取込
import pandas as pd
breast_cancer = pd.read_csv('breast_cancer_wisconsin_data.csv')
# X と y を作成する
X=breast_cancer["radius_mean"]
y=breast_cancer["diagnosis"]
import numpy as np
X = np.array(X)
y = np.array(y)
# カテゴリ値の数値化
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
le.fit(["B", "M"]) # 良性:0, 悪性:1
y = le.transform(y.flatten())
# 訓練データ8割、テストデータ2割に分割
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, train_size = 0.8, shuffle = True)
# SVCの分類モデルを作成
from sklearn.svm import SVC
classifier = SVC(kernel = "linear")
# 訓練データを分類モデルに設定
classifier.fit(X_train, y_train)
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [3], in <module>
26 classifier = SVC(kernel = "linear")
28 # 訓練データを分類モデルに設定
---> 29 classifier.fit(X_train, y_train)
File ~/.pyenv/versions/3.8.3/lib/python3.8/site-packages/sklearn/svm/_base.py:160, in BaseLibSVM.fit(self, X, y, sample_weight)
158 check_consistent_length(X, y)
159 else:
--> 160 X, y = self._validate_data(X, y, dtype=np.float64,
161 order='C', accept_sparse='csr',
162 accept_large_sparse=False)
164 y = self._validate_targets(y)
166 sample_weight = np.asarray([]
167 if sample_weight is None
168 else sample_weight, dtype=np.float64)
File ~/.pyenv/versions/3.8.3/lib/python3.8/site-packages/sklearn/base.py:432, in BaseEstimator._validate_data(self, X, y, reset, validate_separately, **check_params)
430 y = check_array(y, **check_y_params)
431 else:
--> 432 X, y = check_X_y(X, y, **check_params)
433 out = X, y
435 if check_params.get('ensure_2d', True):
File ~/.pyenv/versions/3.8.3/lib/python3.8/site-packages/sklearn/utils/validation.py:73, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
68 warnings.warn("Pass {} as keyword args. From version 0.25 "
69 "passing these as positional arguments will "
70 "result in an error".format(", ".join(args_msg)),
71 FutureWarning)
72 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 73 return f(**kwargs)
File ~/.pyenv/versions/3.8.3/lib/python3.8/site-packages/sklearn/utils/validation.py:796, in check_X_y(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)
793 if y is None:
794 raise ValueError("y cannot be None")
--> 796 X = check_array(X, accept_sparse=accept_sparse,
797 accept_large_sparse=accept_large_sparse,
798 dtype=dtype, order=order, copy=copy,
799 force_all_finite=force_all_finite,
800 ensure_2d=ensure_2d, allow_nd=allow_nd,
801 ensure_min_samples=ensure_min_samples,
802 ensure_min_features=ensure_min_features,
803 estimator=estimator)
804 if multi_output:
805 y = check_array(y, accept_sparse='csr', force_all_finite=True,
806 ensure_2d=False, dtype=None)
File ~/.pyenv/versions/3.8.3/lib/python3.8/site-packages/sklearn/utils/validation.py:73, in _deprecate_positional_args.<locals>.inner_f(*args, **kwargs)
68 warnings.warn("Pass {} as keyword args. From version 0.25 "
69 "passing these as positional arguments will "
70 "result in an error".format(", ".join(args_msg)),
71 FutureWarning)
72 kwargs.update({k: arg for k, arg in zip(sig.parameters, args)})
---> 73 return f(**kwargs)
File ~/.pyenv/versions/3.8.3/lib/python3.8/site-packages/sklearn/utils/validation.py:620, in check_array(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator)
618 # If input is 1D raise error
619 if array.ndim == 1:
--> 620 raise ValueError(
621 "Expected 2D array, got 1D array instead:\narray={}.\n"
622 "Reshape your data either using array.reshape(-1, 1) if "
623 "your data has a single feature or array.reshape(1, -1) "
624 "if it contains a single sample.".format(array))
626 # in the future np.flexible dtypes will be handled like object dtypes
627 if dtype_numeric and np.issubdtype(array.dtype, np.flexible):
ValueError: Expected 2D array, got 1D array instead:
array=[15.53 18.94 14.4 15.73 20.94 15.1 15.27 13.4 10.29 11.52
8.219 9.738 13.14 11.61 15.75 14.76 14.92 12.31 15.66 16.11
9.847 14.97 12.22 11.5 13.37 9.731 10.26 8.598 13.27 18.08
11.76 18.22 10.26 9.436 13.05 18.49 9.397 12.72 13.66 9.333
21.09 8.95 #.....とデータの羅列が続きます。
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.
一番下にarray.reshapeを使用して形状を変更とありましたので、それで実行してみてもダメでした。
2D,1Dどうこう書いてあるんですが、X,y共に1Dのはずなのに、とそこも疑問です。
ValueErrorなので X y どちらかの数値?が何か違うのだと思うんですが、分かりません。
よろしくお願いします。