
今日の作業記録 python error(言語処理100本ノック:97)未解決

Last updated at Posted at 2019-01-24
Traceback (most recent call last):
  File "./p97.py", line 19, in <module>
    predicts = KMeans(n_clusters = 5).fit_predict(matrix_x300)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/cluster/k_means_.py", line 997, in fit_predict
    return self.fit(X, sample_weight=sample_weight).labels_
  File "/opt/conda/lib/python3.7/site-packages/sklearn/cluster/k_means_.py", line 971, in fit
  File "/opt/conda/lib/python3.7/site-packages/sklearn/cluster/k_means_.py", line 311, in k_means
    order=order, copy=copy_x)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py", line 582, in check_array
ValueError: Found array with 0 sample(s) (shape=(0, 0)) while a minimum of 1 is required.
# cat p97.py
#!/usr/bin/env python
# coding: utf-8

import pickle
from collections import OrderedDict
from scipy import io
import numpy as np
from sklearn.cluster import KMeans

fname_dict_index_t = 'dict_index_country'
fname_matrix_x300 = 'matrix_x300_country'

with open(fname_dict_index_t, 'rb') as data_file:
    dict_index_t = pickle.load(data_file)

matrix_x300 = io.loadmat(fname_matrix_x300)['matrix_x300']

predicts = KMeans(n_clusters = 5).fit_predict(matrix_x300)

result = zip(dict_index_t.keys(), predicts)

for country, category in sorted(result, key = lambda x: x[1]):
    print('{}\t{}'.format(category, country))
# ./p98.py
/opt/conda/lib/python3.7/site-packages/scipy/cluster/hierarchy.py:482: ClusterWarning: scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix
  return linkage(y, method='ward', metric='euclidean')
Traceback (most recent call last):
  File "./p98.py", line 21, in <module>
    ward = ward(matrix_x300)
  File "/opt/conda/lib/python3.7/site-packages/scipy/cluster/hierarchy.py", line 482, in ward
    return linkage(y, method='ward', metric='euclidean')
  File "/opt/conda/lib/python3.7/site-packages/scipy/cluster/hierarchy.py", line 716, in linkage
    n = int(distance.num_obs_y(y))
  File "/opt/conda/lib/python3.7/site-packages/scipy/spatial/distance.py", line 2276, in num_obs_y
    raise ValueError("The number of observations cannot be determined on "
ValueError: The number of observations cannot be determined on an empty distance matrix.
#!/usr/bin/env python
# coding: utf-8

import pickle
from collections import OrderedDict
from scipy import io
import numpy as np

from scipy.cluster.hierarchy import ward, dendrogram
from matplotlib import pyplot as plt

fname_dict_index_t = 'dict_index_country'
fname_matrix_x300 = 'matrix_x300_country'

with open(fname_dict_index_t, 'rb') as data_file:
    dict_index_t = pickle.load(data_file)

matrix_x300 = io.loadmat(fname_matrix_x300)['matrix_x300']

ward = ward(matrix_x300)

dendrogram(ward, labels = list(dict_index_t.keys()), leaf_font_size = 8)
Traceback (most recent call last):
  File "./p99.py", line 21, in <module>
    t_sne = TSNE(perplexity = 30, learning_rate = 500).fit_transform(matrix_x300)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/manifold/t_sne.py", line 894, in fit_transform
    embedding = self._fit(X)
  File "/opt/conda/lib/python3.7/site-packages/sklearn/manifold/t_sne.py", line 693, in _fit
    dtype=[np.float32, np.float64])
  File "/opt/conda/lib/python3.7/site-packages/sklearn/utils/validation.py", line 582, in check_array
ValueError: Found array with 0 sample(s) (shape=(0, 0)) while a minimum of 2 is required.
#!/usr/bin/env python
# coding: utf-8
import pickle
from collections import OrderedDict
from scipy import io
import numpy as np

from sklearn.manifold import TSNE
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans

fname_dict_index_t = 'dict_index_country'
fname_matrix_x300 = 'matrix_x300_country'

with open(fname_dict_index_t, 'rb') as data_file:
    dict_index_t = pickle.load(data_file)

matrix_x300 = io.loadmat(fname_matrix_x300)['matrix_x300']

t_sne = TSNE(perplexity = 30, learning_rate = 500).fit_transform(matrix_x300)

predicts = KMeans(n_clusters = 5).fit_predict(matrix_x300)

fig, ax = plt.subplots()
cmap = plt.get_cmap('Set1')
for index, label in enumerate(dict_index_t.keys()):
    cval = cmap(predicts[index] / 4)
    ax.scatter(t_sne[index, 0], t_sne[index, 1], marker = '.', color = cval)
    ax.annotate(label, xy = (t_sne[index, 0], t_sne[index, 1]), color = cval)



