tutorial - puertos elasticsearch
MĂșltiples grupos por en Elasticsearch (2)
A partir de la versión 1.0 de ElasticSearch
, la nueva API de agregaciones permite la agrupación por múltiples campos, utilizando sub-agregaciones . Supongamos que desea agrupar por los campos field2
, field2
y field2
:
{
"aggs": {
"agg1": {
"terms": {
"field": "field1"
},
"aggs": {
"agg2": {
"terms": {
"field": "field2"
},
"aggs": {
"agg3": {
"terms": {
"field": "field3"
}
}
}
}
}
}
}
}
Por supuesto, esto puede continuar para tantos campos como desee.
Actualizar:
Para completar, así es como se ve el resultado de la consulta anterior. También debajo está el código python para generar la consulta de agregación y aplanar el resultado en una lista de diccionarios.
{
"aggregations": {
"agg1": {
"buckets": [{
"doc_count": <count>,
"key": <value of field1>,
"agg2": {
"buckets": [{
"doc_count": <count>,
"key": <value of field2>,
"agg3": {
"buckets": [{
"doc_count": <count>,
"key": <value of field3>
},
{
"doc_count": <count>,
"key": <value of field3>
}, ...
]
},
{
"doc_count": <count>,
"key": <value of field2>,
"agg3": {
"buckets": [{
"doc_count": <count>,
"key": <value of field3>
},
{
"doc_count": <count>,
"key": <value of field3>
}, ...
]
}, ...
]
},
{
"doc_count": <count>,
"key": <value of field1>,
"agg2": {
"buckets": [{
"doc_count": <count>,
"key": <value of field2>,
"agg3": {
"buckets": [{
"doc_count": <count>,
"key": <value of field3>
},
{
"doc_count": <count>,
"key": <value of field3>
}, ...
]
},
{
"doc_count": <count>,
"key": <value of field2>,
"agg3": {
"buckets": [{
"doc_count": <count>,
"key": <value of field3>
},
{
"doc_count": <count>,
"key": <value of field3>
}, ...
]
}, ...
]
}, ...
]
}
}
}
El siguiente código python realiza el grupo-por la lista dada de campos. Especifico include_missing=True
, también incluye combinaciones de valores donde faltan algunos de los campos (no lo necesita si tiene la versión 2.0 de Elasticsearch gracias a esto )
def group_by(es, fields, include_missing):
current_level_terms = {''terms'': {''field'': fields[0]}}
agg_spec = {fields[0]: current_level_terms}
if include_missing:
current_level_missing = {''missing'': {''field'': fields[0]}}
agg_spec[fields[0] + ''_missing''] = current_level_missing
for field in fields[1:]:
next_level_terms = {''terms'': {''field'': field}}
current_level_terms[''aggs''] = {
field: next_level_terms,
}
if include_missing:
next_level_missing = {''missing'': {''field'': field}}
current_level_terms[''aggs''][field + ''_missing''] = next_level_missing
current_level_missing[''aggs''] = {
field: next_level_terms,
field + ''_missing'': next_level_missing,
}
current_level_missing = next_level_missing
current_level_terms = next_level_terms
agg_result = es.search(body={''aggs'': agg_spec})[''aggregations'']
return get_docs_from_agg_result(agg_result, fields, include_missing)
def get_docs_from_agg_result(agg_result, fields, include_missing):
current_field = fields[0]
buckets = agg_result[current_field][''buckets'']
if include_missing:
buckets.append(agg_result[(current_field + ''_missing'')])
if len(fields) == 1:
return [
{
current_field: bucket.get(''key''),
''doc_count'': bucket[''doc_count''],
}
for bucket in buckets if bucket[''doc_count''] > 0
]
result = []
for bucket in buckets:
records = get_docs_from_agg_result(bucket, fields[1:], include_missing)
value = bucket.get(''key'')
for record in records:
record[current_field] = value
result.extend(records)
return result
Necesito agregar (agrupar-por) usando 3 campos en ES.
¿Puedo hacer eso en 1 consulta o que necesito usar faceta + iterar para cada columna?
Gracias
Puedes hacerlo de 2 maneras:
1) usando múltiples campos en un solo resultado de faceta:
ejemplo para facetas de campos individuales:
curl -X GET "http://localhost:9200/sales/order/_search?pretty=true" -d ''{
"query": {
"query_string": {
"query": "shohi*",
"fields": [
"billing_name"
]
}
},
"facets": {
"facet_result": {
"terms": {
"fields": [
"status"
],
"order": "term",
"size": 15
}
}
}
}''
ejemplo para campo múltiple en un solo resultado de faceta:
curl -X GET "http://localhost:9200/sales/order/_search?pretty=true" -d ''{
"query": {
"query_string": {
"query": "shohi*",
"fields": [
"billing_name"
]
}
},
"facets": {
"facet_result": {
"terms": {
"fields": [
"status",
"customer_gender",
"state"
],
"order": "term",
"size": 15
}
}
}
}''
2) Usar conjunto de resultados de múltiples facetas:
curl -X GET "http://localhost:9200/sales/order/_search?pretty=true" -d ''{
"query": {
"query_string": {
"query": "*",
"fields": [
"increment_id"
]
}
},
"facets": {
"status_facets": {
"terms": {
"fields": [
"status"
],
"size": 50,
"order": "term"
}
},
"gender_facets": {
"terms": {
"fields": [
"customer_gender"
]
}
},
"state_facets": {
"terms": {
"fields": [
"state"
],
,
"order": "term"
}
}
}
}''
Enlace de referencia: http://www.elasticsearch.org/guide/reference/api/search/facets/terms-facet.html