matriz diagrama correlación caja bigote python matplotlib boxplot

diagrama - matriz de correlación python



matplotlib: diagramas de caja grupales (6)

¿Hay alguna forma de agrupar las gráficas de caja en matplotlib?

Supongamos que tenemos tres grupos "A", "B" y "C" y para cada uno queremos crear un diagrama de caja para "manzanas" y "naranjas". Si no es posible agrupar directamente, podemos crear las seis combinaciones y colocarlas linealmente una al lado de la otra. ¿Cuál sería la forma más sencilla de visualizar las agrupaciones? Estoy tratando de evitar establecer las etiquetas de tic en algo así como "manzanas A +" ya que mi escenario implica nombres mucho más largos que "A".


¿Qué hay de usar colores para diferenciar entre "manzanas" y "naranjas" y el espaciado para separar "A", "B" y "C"?

Algo como esto:

from pylab import plot, show, savefig, xlim, figure, / hold, ylim, legend, boxplot, setp, axes # function for setting the colors of the box plots pairs def setBoxColors(bp): setp(bp[''boxes''][0], color=''blue'') setp(bp[''caps''][0], color=''blue'') setp(bp[''caps''][1], color=''blue'') setp(bp[''whiskers''][0], color=''blue'') setp(bp[''whiskers''][1], color=''blue'') setp(bp[''fliers''][0], color=''blue'') setp(bp[''fliers''][1], color=''blue'') setp(bp[''medians''][0], color=''blue'') setp(bp[''boxes''][1], color=''red'') setp(bp[''caps''][2], color=''red'') setp(bp[''caps''][3], color=''red'') setp(bp[''whiskers''][2], color=''red'') setp(bp[''whiskers''][3], color=''red'') setp(bp[''fliers''][2], color=''red'') setp(bp[''fliers''][3], color=''red'') setp(bp[''medians''][1], color=''red'') # Some fake data to plot A= [[1, 2, 5,], [7, 2]] B = [[5, 7, 2, 2, 5], [7, 2, 5]] C = [[3,2,5,7], [6, 7, 3]] fig = figure() ax = axes() hold(True) # first boxplot pair bp = boxplot(A, positions = [1, 2], widths = 0.6) setBoxColors(bp) # second boxplot pair bp = boxplot(B, positions = [4, 5], widths = 0.6) setBoxColors(bp) # thrid boxplot pair bp = boxplot(C, positions = [7, 8], widths = 0.6) setBoxColors(bp) # set axes limits and labels xlim(0,9) ylim(0,9) ax.set_xticklabels([''A'', ''B'', ''C'']) ax.set_xticks([1.5, 4.5, 7.5]) # draw temporary red and blue lines and use them to create a legend hB, = plot([1,1],''b-'') hR, = plot([1,1],''r-'') legend((hB, hR),(''Apples'', ''Oranges'')) hB.set_visible(False) hR.set_visible(False) savefig(''boxcompare.png'') show()


Aquí está mi versión. Almacena datos basados ​​en categorías.

import matplotlib.pyplot as plt import numpy as np data_a = [[1,2,5], [5,7,2,2,5], [7,2,5]] data_b = [[6,4,2], [1,2,5,3,2], [2,3,5,1]] ticks = [''A'', ''B'', ''C''] def set_box_color(bp, color): plt.setp(bp[''boxes''], color=color) plt.setp(bp[''whiskers''], color=color) plt.setp(bp[''caps''], color=color) plt.setp(bp[''medians''], color=color) plt.figure() bpl = plt.boxplot(data_a, positions=np.array(xrange(len(data_a)))*2.0-0.4, sym='''', widths=0.6) bpr = plt.boxplot(data_b, positions=np.array(xrange(len(data_b)))*2.0+0.4, sym='''', widths=0.6) set_box_color(bpl, ''#D7191C'') # colors are from http://colorbrewer2.org/ set_box_color(bpr, ''#2C7BB6'') # draw temporary red and blue lines and use them to create a legend plt.plot([], c=''#D7191C'', label=''Apples'') plt.plot([], c=''#2C7BB6'', label=''Oranges'') plt.legend() plt.xticks(xrange(0, len(ticks) * 2, 2), ticks) plt.xlim(-2, len(ticks)*2) plt.ylim(0, 8) plt.tight_layout() plt.savefig(''boxcompare.png'')

No tengo reputación, así que no puedo publicar una imagen aquí. Puede ejecutarlo y ver el resultado. Básicamente es muy similar a lo que hizo Molly.

Tenga en cuenta que, dependiendo de la versión de python que esté utilizando, es posible que necesite reemplazar el range xrange con range


Aquí hay una función que escribí que toma el código de Molly y algún otro código que he encontrado en Internet para hacer diagramas de caja agrupados ligeramente más elegantes:

import numpy as np import matplotlib.pyplot as plt def custom_legend(colors, labels, linestyles=None): """ Creates a list of matplotlib Patch objects that can be passed to the legend(...) function to create a custom legend. :param colors: A list of colors, one for each entry in the legend. You can also include a linestyle, for example: ''k--'' :param labels: A list of labels, one for each entry in the legend. """ if linestyles is not None: assert len(linestyles) == len(colors), "Length of linestyles must match length of colors." h = list() for k,(c,l) in enumerate(zip(colors, labels)): clr = c ls = ''solid'' if linestyles is not None: ls = linestyles[k] patch = patches.Patch(color=clr, label=l, linestyle=ls) h.append(patch) return h def grouped_boxplot(data, group_names=None, subgroup_names=None, ax=None, subgroup_colors=None, box_width=0.6, box_spacing=1.0): """ Draws a grouped boxplot. The data should be organized in a hierarchy, where there are multiple subgroups for each main group. :param data: A dictionary of length equal to the number of the groups. The key should be the group name, the value should be a list of arrays. The length of the list should be equal to the number of subgroups. :param group_names: (Optional) The group names, should be the same as data.keys(), but can be ordered. :param subgroup_names: (Optional) Names of the subgroups. :param subgroup_colors: A list specifying the plot color for each subgroup. :param ax: (Optional) The axis to plot on. """ if group_names is None: group_names = data.keys() if ax is None: ax = plt.gca() plt.sca(ax) nsubgroups = np.array([len(v) for v in data.values()]) assert len(np.unique(nsubgroups)) == 1, "Number of subgroups for each property differ!" nsubgroups = nsubgroups[0] if subgroup_colors is None: subgroup_colors = list() for k in range(nsubgroups): subgroup_colors.append(np.random.rand(3)) else: assert len(subgroup_colors) == nsubgroups, "subgroup_colors length must match number of subgroups (%d)" % nsubgroups def _decorate_box(_bp, _d): plt.setp(_bp[''boxes''], lw=0, color=''k'') plt.setp(_bp[''whiskers''], lw=3.0, color=''k'') # fill in each box with a color assert len(_bp[''boxes'']) == nsubgroups for _k,_box in enumerate(_bp[''boxes'']): _boxX = list() _boxY = list() for _j in range(5): _boxX.append(_box.get_xdata()[_j]) _boxY.append(_box.get_ydata()[_j]) _boxCoords = zip(_boxX, _boxY) _boxPolygon = plt.Polygon(_boxCoords, facecolor=subgroup_colors[_k]) ax.add_patch(_boxPolygon) # draw a black line for the median for _k,_med in enumerate(_bp[''medians'']): _medianX = list() _medianY = list() for _j in range(2): _medianX.append(_med.get_xdata()[_j]) _medianY.append(_med.get_ydata()[_j]) plt.plot(_medianX, _medianY, ''k'', linewidth=3.0) # draw a black asterisk for the mean plt.plot([np.mean(_med.get_xdata())], [np.mean(_d[_k])], color=''w'', marker=''*'', markeredgecolor=''k'', markersize=12) cpos = 1 label_pos = list() for k in group_names: d = data[k] nsubgroups = len(d) pos = np.arange(nsubgroups) + cpos label_pos.append(pos.mean()) bp = plt.boxplot(d, positions=pos, widths=box_width) _decorate_box(bp, d) cpos += nsubgroups + box_spacing plt.xlim(0, cpos-1) plt.xticks(label_pos, group_names) if subgroup_names is not None: leg = custom_legend(subgroup_colors, subgroup_names) plt.legend(handles=leg)

Puede usar la (s) función (es) de esta manera:

data = { ''A'':[np.random.randn(100), np.random.randn(100) + 5], ''B'':[np.random.randn(100)+1, np.random.randn(100) + 9], ''C'':[np.random.randn(100)-3, np.random.randn(100) -5] } grouped_boxplot(data, group_names=[''A'', ''B'', ''C''], subgroup_names=[''Apples'', ''Oranges''], subgroup_colors=[''#D02D2E'', ''#D67700'']) plt.show()


Datos falsos:

df = pd.DataFrame({''Group'':[''A'',''A'',''A'',''B'',''C'',''B'',''B'',''C'',''A'',''C''],/ ''Apple'':np.random.rand(10),''Orange'':np.random.rand(10)}) df = df[[''Group'',''Apple'',''Orange'']] Group Apple Orange 0 A 0.465636 0.537723 1 A 0.560537 0.727238 2 A 0.268154 0.648927 3 B 0.722644 0.115550 4 C 0.586346 0.042896 5 B 0.562881 0.369686 6 B 0.395236 0.672477 7 C 0.577949 0.358801 8 A 0.764069 0.642724 9 C 0.731076 0.302369

Puede usar la biblioteca de Seaborn para estas parcelas. Primero melt el marco de datos para formatear los datos y luego crea el diagrama de caja que elijas.

import pandas as pd import matplotlib.pyplot as plt import seaborn as sns dd=pd.melt(df,id_vars=[''Group''],value_vars=[''Apple'',''Orange''],var_name=''fruits'') sns.boxplot(x=''Group'',y=''value'',data=dd,hue=''fruits'')


Solo para agregar a la conversación, he encontrado una forma más elegante de cambiar el color de la gráfica de cajas al iterar sobre el diccionario del objeto mismo

import numpy as np import matplotlib.pyplot as plt def color_box(bp, color): # Define the elements to color. You can also add medians, fliers and means elements = [''boxes'',''caps'',''whiskers''] # Iterate over each of the elements changing the color for elem in elements: [plt.setp(bp[elem][idx], color=color) for idx in xrange(len(bp[elem]))] return a = np.random.uniform(0,10,[100,5]) bp = plt.boxplot(a) color_box(bp, ''red'')

¡Aclamaciones!


Una forma simple sería usar pandas . Adapte un ejemplo de la documentación de trazado :

In [1]: import pandas as pd, numpy as np In [2]: df = pd.DataFrame(np.random.rand(12,2), columns=[''Apples'', ''Oranges''] ) In [3]: df[''Categories''] = pd.Series(list(''AAAABBBBCCCC'')) In [4]: pd.options.display.mpl_style = ''default'' In [5]: df.boxplot(by=''Categories'') Out[5]: array([<matplotlib.axes.AxesSubplot object at 0x51a5190>, <matplotlib.axes.AxesSubplot object at 0x53fddd0>], dtype=object)