sql - Unir intervalos de validez de fecha consecutivos

oracle oracle11g (4)

Este es un conjunto bastante complicado de pasos, pero es la forma en que resolví un problema similar:

-- Sample Data CREATE TABLE AdjacentValidity ( RowID INT IDENTITY(1,1) NOT NULL, Product VARCHAR(1) NOT NULL, Start_Date DATETIME NOT NULL, End_Date DATETIME NOT NULL ) INSERT INTO AdjacentValidity (Product, Start_Date, End_Date) SELECT ''A'', ''7/1/2013'', ''7/31/2013'' UNION SELECT ''A'', ''8/1/2013'', ''8/31/2013'' UNION SELECT ''A'', ''9/1/2013'', ''9/30/2013'' UNION SELECT ''B'', ''10/1/2013'', ''10/31/2013'' UNION SELECT ''B'', ''11/1/2013'', ''11/30/2013'' UNION SELECT ''A'', ''12/1/2013'', ''12/31/2013'' UNION SELECT ''A'', ''1/1/2014'', ''1/31/2014'' UNION SELECT ''A'', ''2/1/2014'', ''2/28/2014'' UNION SELECT ''A'', ''3/1/2014'', ''3/31/2014'' -- Modify the sample data to include necessary tags CREATE TABLE #RawData ( RawData_ID INT IDENTITY(1,1) NOT NULL, Product VARCHAR(1) NOT NULL, Start_Date DATETIME NOT NULL, End_Date DATETIME NOT NULL, isFirstOccurrence BIT NULL, isLastOccurrence BIT NULL, isFirstInstance BIT NULL, isLastInstance BIT NULL ) -- Load and flag first occurrences of a natural key INSERT INTO #RawData ( Product, Start_Date, End_Date, isFirstInstance ) SELECT Product, Start_Date, End_Date, CASE WHEN ROW_NUMBER() OVER ( --PARTITION BY <NaturalKey> ORDER BY Start_date ) = 1 THEN 1 ELSE 0 END AS isFirstOccurrence FROM AdjacentValidity -- update to flag the last sequential instance of a particalar data set, and the last occurrence of a natural key UPDATE a SET a.isLastInstance = CASE WHEN a.Product <> b.Product OR DATEADD(m, 1, a.Start_Date) <> b.Start_Date OR b.RawData_ID IS NULL THEN 1 ELSE 0 END, a.isLastOccurrence = CASE WHEN b.RawData_ID IS NULL THEN 1 ELSE 0 END FROM #RawData a LEFT JOIN #RawData b ON b.RawData_ID = a.RawData_ID + 1 --AND --b.<NaturalKey> = a.<NaturalKey> -- flag first sequential instance of a particular data set UPDATE b SET b.isFirstInstance = CASE WHEN a.isLastInstance = 1 THEN 1 ELSE 0 END FROM #RawData a LEFT JOIN #RawData b ON b.RawData_ID = a.RawData_ID + 1 --AND --b.<NaturalKey> = a.<NaturalKey> -- reduce the records to only those that are the first or last occurrence of a particular data set CREATE TABLE #UniqueData ( [UniqueData_ID] [int] IDENTITY(1,1) NOT NULL, Start_Date DATETIME NOT NULL, End_Date DATETIME NOT NULL, Product VARCHAR(1) NULL, isFirstOccurrence BIT NULL, isLastOccurrence BIT NULL, isFirstInstance BIT NULL, isLastInstance BIT NULL ) INSERT INTO #UniqueData ( Start_Date, End_Date, Product, isFirstOccurrence, isLastOccurrence, isFirstInstance, isLastInstance ) SELECT Start_Date, End_Date, Product, isFirstOccurrence, isLastOccurrence, isFirstInstance, isLastInstance FROM #RawData WHERE isFirstOccurrence = 1 OR isFirstInstance = 1 OR isLastInstance = 1 ORDER BY RawData_ID, Start_Date -- combine the first and last occurrences in any given sequence into a single row SELECT a.Start_Date, ISNULL(b.Start_Date, a.End_Date) End_Date, a.Product FROM #UniqueData a LEFT JOIN #UniqueData b ON b.UniqueData_ID = a.UniqueData_ID + 1 AND --b.<NaturalKey> = a.<NaturalKey> AND a.isLastInstance <> 1 WHERE a.isFirstInstance = 1 or a.isFirstOccurrence = 1 ORDER BY a.UniqueData_ID -- clean up /* DROP TABLE AdjacentValidity DROP TABLE #RawData DROP TABLE #UniqueData */

Tengo una serie de registros que contienen cierta información (tipo de producto) con validez temporal.

Me gustaría fusionar los intervalos de validez adyacentes, siempre que la información de agrupación (el tipo de producto) se mantenga igual. No puedo usar un simple GROUP BY con MIN y MAX , porque algunos tipos de productos ( A , en el ejemplo) pueden "desaparecer" y "regresar".

Usando Oracle 11g.

Una pregunta similar para MySQL es: ¿Cómo puedo hacer un grupo contiguo en MySQL?

Datos de entrada :

| PRODUCT | START_DATE | END_DATE | |---------|----------------------------------|----------------------------------| | A | July, 01 2013 00:00:00+0000 | July, 31 2013 00:00:00+0000 | | A | August, 01 2013 00:00:00+0000 | August, 31 2013 00:00:00+0000 | | A | September, 01 2013 00:00:00+0000 | September, 30 2013 00:00:00+0000 | | B | October, 01 2013 00:00:00+0000 | October, 31 2013 00:00:00+0000 | | B | November, 01 2013 00:00:00+0000 | November, 30 2013 00:00:00+0000 | | A | December, 01 2013 00:00:00+0000 | December, 31 2013 00:00:00+0000 | | A | January, 01 2014 00:00:00+0000 | January, 31 2014 00:00:00+0000 | | A | February, 01 2014 00:00:00+0000 | February, 28 2014 00:00:00+0000 | | A | March, 01 2014 00:00:00+0000 | March, 31 2014 00:00:00+0000 |

Resultados esperados :

| PRODUCT | START_DATE | END_DATE | |---------|---------------------------------|----------------------------------| | A | July, 01 2013 00:00:00+0000 | September, 30 2013 00:00:00+0000 | | B | October, 01 2013 00:00:00+0000 | November, 30 2013 00:00:00+0000 | | A | December, 01 2013 00:00:00+0000 | March, 31 2014 00:00:00+0000 |

Vea el completo SQL Fiddle .

Este es un problema de lagunas e islas. Hay varias formas de abordarlo; esto usa funciones analíticas de lead y lag :

select distinct product, case when start_date is null then lag(start_date) over (partition by product order by rn) else start_date end as start_date, case when end_date is null then lead(end_date) over (partition by product order by rn) else end_date end as end_date from ( select product, start_date, end_date, rn from ( select t.product, case when lag(end_date) over (partition by product order by start_date) is null or lag(end_date) over (partition by product order by start_date) != start_date - 1 then start_date end as start_date, case when lead(start_date) over (partition by product order by start_date) is null or lead(start_date) over (partition by product order by start_date) != end_date + 1 then end_date end as end_date, row_number() over (partition by product order by start_date) as rn from t ) where start_date is not null or end_date is not null ) order by start_date, product; PRODUCT START_DATE END_DATE ------- ---------- --------- A 01-JUL-13 30-SEP-13 B 01-OCT-13 30-NOV-13 A 01-DEC-13 31-MAR-14

SQL Fiddle

La consulta más interna examina los registros anteriores y siguientes para el producto, y solo retiene la hora de inicio y / o finalización si los registros no son contiguos:

select t.product, case when lag(end_date) over (partition by product order by start_date) is null or lag(end_date) over (partition by product order by start_date) != start_date - 1 then start_date end as start_date, case when lead(start_date) over (partition by product order by start_date) is null or lead(start_date) over (partition by product order by start_date) != end_date + 1 then end_date end as end_date from t; PRODUCT START_DATE END_DATE ------- ---------- --------- A 01-JUL-13 A A 30-SEP-13 A 01-DEC-13 A A A 31-MAR-14 B 01-OCT-13 B 30-NOV-13

El siguiente nivel de selección elimina aquellos que están en el punto medio, donde ambas fechas fueron borradas por la consulta interna, lo que da:

PRODUCT START_DATE END_DATE ------- ---------- --------- A 01-JUL-13 A 30-SEP-13 A 01-DEC-13 A 31-MAR-14 B 01-OCT-13 B 30-NOV-13

La consulta externa luego colapsa esos pares adyacentes; He utilizado la ruta fácil de crear duplicados y luego eliminarlos con distinct , pero puedes hacerlo de otras maneras, como poner ambos valores en uno de los pares de filas y dejar ambos valores en el otro nulo, y luego eliminarlos con otra capa de seleccionar, pero creo que distinto está bien aquí.

Si su caso de uso en el mundo real tiene tiempos, no solo fechas, entonces tendrá que ajustar la comparación en la consulta interna; en lugar de +/- 1, un intervalo de 1 segundo quizás, o 1/86400 si lo prefiere, pero depende de la precisión de sus valores.

Parece que debería haber una manera más fácil, pero una combinación de una consulta analítica (para encontrar los diferentes espacios) y una consulta jerárquica (para conectar las filas que son continuas) funciona:

with data as ( select ''A'' product, to_date(''7/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''7/31/2013'', ''MM/DD/YYYY'') end_date from dual union all select ''A'' product, to_date(''8/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''8/31/2013'', ''MM/DD/YYYY'') end_date from dual union all select ''A'' product, to_date(''9/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''9/30/2013'', ''MM/DD/YYYY'') end_date from dual union all select ''B'' product, to_date(''10/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''10/31/2013'', ''MM/DD/YYYY'') end_date from dual union all select ''B'' product, to_date(''11/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''11/30/2013'', ''MM/DD/YYYY'') end_date from dual union all select ''A'' product, to_date(''12/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''12/31/2013'', ''MM/DD/YYYY'') end_date from dual union all select ''A'' product, to_date(''1/1/2014'', ''MM/DD/YYYY'') start_date, to_date(''1/31/2014'', ''MM/DD/YYYY'') end_date from dual union all select ''A'' product, to_date(''2/1/2014'', ''MM/DD/YYYY'') start_date, to_date(''2/28/2014'', ''MM/DD/YYYY'') end_date from dual union all select ''A'' product, to_date(''3/1/2014'', ''MM/DD/YYYY'') start_date, to_date(''3/31/2014'', ''MM/DD/YYYY'') end_date from dual ), start_points as ( select product, start_date, end_date, prior_end+1, case when prior_end + 1 = start_date then null else ''Y'' end start_point from ( select product, start_date, end_date, lag(end_date,1) over (partition by product order by end_date) prior_end from data ) ) select product, min(start_date) start_date, max(end_date) end_date from ( select product, start_date, end_date, level, connect_by_root(start_date) root_start from start_points start with start_point = ''Y'' connect by prior end_date = start_date - 1 and prior product = product ) group by product, root_start; PRODUCT START_DATE END_DATE ------- ---------- --------- A 01-JUL-13 30-SEP-13 A 01-DEC-13 31-MAR-14 B 01-OCT-13 30-NOV-13

Pruebe algo como:

with dat as ( select ''A'' as product, sysdate-3 as start_dte, sysdate-2 as end_dte from dual union all select ''A'' as product, sysdate-2 as start_dte, sysdate-1 as end_dte from dual union all select ''B'' as product, sysdate-5 as start_dte, sysdate-4 as end_dte from dual ) SELECT product, MIN(start_dte) KEEP (DENSE_RANK FIRST ORDER BY start_dte) "Start", MAX(end_dte) KEEP (DENSE_RANK LAST ORDER BY end_dte) "End" FROM dat GROUP BY product ORDER BY product;

Salida

PRODUCT Start End A 2/24/2014 10:25:53 AM 2/26/2014 10:25:53 AM B 2/22/2014 10:25:53 AM 2/23/2014 10:25:53 AM