python - sklearn - Prueba de clasificación en Scikit-learn, ValueError: configuración de un elemento de matriz con una secuencia
sklearn precision (1)
tratar:
imgs = []
tmp_hogs = np.zeros((17, 256))
# 13 of the images are with vehicles, 4 are without
labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]
i = 0
for file in out:
filepath = "C:/PATH_TO_SAMPLE_IMAGES//" + file
curr_img = color.rgb2gray(io.imread(filepath))
imgs.append(resize(curr_img,(60,40)))
fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualise=True)
tmp_hogs[i,:] = fd
i+=1
img_hogs = tmp_hogs
Usando el tutorial sobre adaboost multiclase , estoy tratando de clasificar algunas imágenes que tienen dos clases (pero no creo que el algoritmo no funcione si el problema es binario). Luego voy a extender mis muestras para incluir otras clases.
Mi prueba actual es bastante pequeña, solo 17 imágenes en total, 10 para entrenamiento, 7 para prueba.
Por ahora tengo dos clases: 0: no vehicle, 1: vehicle present
Utilicé etiquetas enteras porque, según el ejemplo del enlace anterior, los datos de entrenamiento consisten en etiquetas basadas en enteros.
He editado el ejemplo proporcionado solo un poco, para incluir mis propios archivos de imagen, pero me aparece un error.
Traceback (most recent call last):
File "C:/Users/app/Documents/Python Scripts/carclassify.py", line 66, in <module>
bdt_discrete.fit(X_train, y_train)
File "C:/Users/app/Anaconda/lib/site-packages/sklearn/ensemble/weight_boosting.py", line 389, in fit
return super(AdaBoostClassifier, self).fit(X, y, sample_weight)
File "C:/Users/app/Anaconda/lib/site-packages/sklearn/ensemble/weight_boosting.py", line 99, in fit
X = np.ascontiguousarray(array2d(X), dtype=DTYPE)
File "C:/Users/app/Anaconda/lib/site-packages/numpy/core/numeric.py", line 408, in ascontiguousarray
return array(a, dtype, copy=False, order=''C'', ndmin=1)
ValueError: setting an array element with a sequence.
El siguiente es mi código, adaptado del ejemplo en el sitio web scikit-learn:
f = open("PATH_TO_SAMPLES//samples.txt",''r'')
out = f.read().splitlines()
import numpy as np
imgs = []
tmp_hogs = []
# 13 of the images are with vehicles, 4 are without
labels = [1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0]
for file in out:
filepath = "C:/PATH_TO_SAMPLE_IMAGES//" + file
curr_img = color.rgb2gray(io.imread(filepath))
imgs.append(resize(curr_img,(60,40)))
fd, hog_image = hog(curr_img, orientations=8, pixels_per_cell=(16, 16),
cells_per_block=(1, 1), visualise=True)
tmp_hogs.append(fd)
img_hogs = np.array(tmp_hogs)
n_split = 10
X_train, X_test = img_hogs[:n_split], X[n_split:] # all first ten images with vehicles
y_train, y_test = labels[:n_split], labels[n_split:] # 3 images with vehicles, 4 without
#now all the code below is straight off the example on scikit-learn''s website
bdt_real = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1)
bdt_discrete = AdaBoostClassifier(
DecisionTreeClassifier(max_depth=2),
n_estimators=600,
learning_rate=1.5,
algorithm="SAMME")
bdt_real.fit(X_train, y_train)
bdt_discrete.fit(X_train, y_train)
real_test_errors = []
discrete_test_errors = []
for real_test_predict, discrete_train_predict in zip(
bdt_real.staged_predict(X_test), bdt_discrete.staged_predict(X_test)):
real_test_errors.append(
1. - accuracy_score(real_test_predict, y_test))
discrete_test_errors.append(
1. - accuracy_score(discrete_train_predict, y_test))
n_trees = xrange(1, len(bdt_discrete) + 1)
pl.figure(figsize=(15, 5))
pl.subplot(131)
pl.plot(n_trees, discrete_test_errors, c=''black'', label=''SAMME'')
pl.plot(n_trees, real_test_errors, c=''black'',
linestyle=''dashed'', label=''SAMME.R'')
pl.legend()
pl.ylim(0.18, 0.62)
pl.ylabel(''Test Error'')
pl.xlabel(''Number of Trees'')
pl.subplot(132)
pl.plot(n_trees, bdt_discrete.estimator_errors_, "b", label=''SAMME'', alpha=.5)
pl.plot(n_trees, bdt_real.estimator_errors_, "r", label=''SAMME.R'', alpha=.5)
pl.legend()
pl.ylabel(''Error'')
pl.xlabel(''Number of Trees'')
pl.ylim((.2,
max(bdt_real.estimator_errors_.max(),
bdt_discrete.estimator_errors_.max()) * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))
pl.subplot(133)
pl.plot(n_trees, bdt_discrete.estimator_weights_, "b", label=''SAMME'')
pl.legend()
pl.ylabel(''Weight'')
pl.xlabel(''Number of Trees'')
pl.ylim((0, bdt_discrete.estimator_weights_.max() * 1.2))
pl.xlim((-20, len(bdt_discrete) + 20))
# prevent overlapping y-axis labels
pl.subplots_adjust(wspace=0.25)
pl.show()
Editar
escribí
print tmp_hogs
y el resultado fue este:
[ array([ 0.27621208, 0.11038658, 0.10698133, ..., 0.08661556, 0.04612063, 0.0280782 ]),
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -1.29909838e-15, -7.01780982e-17, -1.24900943e-15]),
array([ 0.0503603 , 0.1497235 , 0.2372957 , ..., 0.07249325, 0.04545541, 0.00903818]),
array([ 0.27299191, 0.13122109, 0.0719268 , ..., 0.0848522 , 0.04789403, 0.01387038]),
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., 3.32140617e-17, -6.58924128e-17, -6.23567224e-16]),
array([ 0.37431874, 0.18094303, 0.01219871, ..., 0.06501856, 0.04855516, 0.02439321]),
array([ 0.41087302, 0.16478851, 0.03396399, ..., 0.09511273, 0.04077713, 0.03945513]),
array([ 0.17753915, 0.07025565, 0.09136909, ..., 0.03396507, 0.01379266, 0.01645722]),
array([ 0.40605587, 0.05915388, 0.03767763, ..., 0.08981079, 0.05452031, 0.01725399]),
array([ 0. , 0. , 0. , ..., 0.00579303, 0.02053979, 0.0019091 ]),
array([ 0.31550735, 0.11988131, 0.07716529, ..., 0.09815158, 0.03058497, 0.02236517]),
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -3.51175682e-16, 1.31619418e-03, 2.86127901e-16]),
array([ 0.21381704, 0.22352378, 0.11568828, ..., 0.06311083, 0.02696666, 0.00402261]),
array([ 0.17480064, 0.1469145 , 0.16336016, ..., 0.05614001, 0.03244093, 0.00524034]),
array([ 0. , 0. , 0. , ..., 0.03089959, 0.00509584, 0.00247698]),
array([ 0.04711166, 0.0218663 , 0.05316 , ..., 0.04214594, 0.04892439, 0.25840958]),
array([ 0.05357464, 0.00530857, 0.07162301, ..., 0.06802692, 0.08331959, 0.26619977])]
Entonces corrí
print img_hogs
y la salida fue:
[ array([ 0.27621208, 0.11038658, 0.10698133, ..., 0.08661556, 0.04612063, 0.0280782 ])
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -1.29909838e-15, -7.01780982e-17, -1.24900943e-15])
array([ 0.0503603 , 0.1497235 , 0.2372957 , ..., 0.07249325, 0.04545541, 0.00903818])
array([ 0.27299191, 0.13122109, 0.0719268 , ..., 0.0848522 , 0.04789403, 0.01387038])
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., 3.32140617e-17, -6.58924128e-17, -6.23567224e-16])
array([ 0.37431874, 0.18094303, 0.01219871, ..., 0.06501856, 0.04855516, 0.02439321])
array([ 0.41087302, 0.16478851, 0.03396399, ..., 0.09511273, 0.04077713, 0.03945513])
array([ 0.17753915, 0.07025565, 0.09136909, ..., 0.03396507, 0.01379266, 0.01645722])
array([ 0.40605587, 0.05915388, 0.03767763, ..., 0.08981079, 0.05452031, 0.01725399])
array([ 0. , 0. , 0. , ..., 0.00579303, 0.02053979, 0.0019091 ])
array([ 0.31550735, 0.11988131, 0.07716529, ..., 0.09815158, 0.03058497, 0.02236517])
array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ..., -3.51175682e-16, 1.31619418e-03, 2.86127901e-16])
array([ 0.21381704, 0.22352378, 0.11568828, ..., 0.06311083, 0.02696666, 0.00402261])
array([ 0.17480064, 0.1469145 , 0.16336016, ..., 0.05614001, 0.03244093, 0.00524034])
array([ 0. , 0. , 0. , ..., 0.03089959, 0.00509584, 0.00247698])
array([ 0.04711166, 0.0218663 , 0.05316 , ..., 0.04214594, 0.04892439, 0.25840958])
array([ 0.05357464, 0.00530857, 0.07162301, ..., 0.06802692, 0.08331959, 0.26619977])]