sql - Unir intervalos de validez de fecha consecutivos
oracle oracle11g (4)
Este es un conjunto bastante complicado de pasos, pero es la forma en que resolví un problema similar:
-- Sample Data
CREATE TABLE AdjacentValidity
(
RowID INT IDENTITY(1,1) NOT NULL,
Product VARCHAR(1) NOT NULL,
Start_Date DATETIME NOT NULL,
End_Date DATETIME NOT NULL
)
INSERT INTO AdjacentValidity (Product, Start_Date, End_Date)
SELECT ''A'', ''7/1/2013'', ''7/31/2013'' UNION
SELECT ''A'', ''8/1/2013'', ''8/31/2013'' UNION
SELECT ''A'', ''9/1/2013'', ''9/30/2013'' UNION
SELECT ''B'', ''10/1/2013'', ''10/31/2013'' UNION
SELECT ''B'', ''11/1/2013'', ''11/30/2013'' UNION
SELECT ''A'', ''12/1/2013'', ''12/31/2013'' UNION
SELECT ''A'', ''1/1/2014'', ''1/31/2014'' UNION
SELECT ''A'', ''2/1/2014'', ''2/28/2014'' UNION
SELECT ''A'', ''3/1/2014'', ''3/31/2014''
-- Modify the sample data to include necessary tags
CREATE TABLE #RawData
(
RawData_ID INT IDENTITY(1,1) NOT NULL,
Product VARCHAR(1) NOT NULL,
Start_Date DATETIME NOT NULL,
End_Date DATETIME NOT NULL,
isFirstOccurrence BIT NULL,
isLastOccurrence BIT NULL,
isFirstInstance BIT NULL,
isLastInstance BIT NULL
)
-- Load and flag first occurrences of a natural key
INSERT INTO #RawData
(
Product,
Start_Date,
End_Date,
isFirstInstance
)
SELECT
Product,
Start_Date,
End_Date,
CASE WHEN ROW_NUMBER() OVER
(
--PARTITION BY <NaturalKey>
ORDER BY Start_date
) = 1 THEN 1 ELSE 0 END AS isFirstOccurrence
FROM AdjacentValidity
-- update to flag the last sequential instance of a particalar data set, and the last occurrence of a natural key
UPDATE a
SET
a.isLastInstance =
CASE
WHEN
a.Product <> b.Product OR
DATEADD(m, 1, a.Start_Date) <> b.Start_Date OR
b.RawData_ID IS NULL
THEN 1
ELSE 0
END,
a.isLastOccurrence =
CASE
WHEN
b.RawData_ID IS NULL
THEN 1
ELSE 0
END
FROM
#RawData a
LEFT JOIN
#RawData b ON
b.RawData_ID = a.RawData_ID + 1 --AND
--b.<NaturalKey> = a.<NaturalKey>
-- flag first sequential instance of a particular data set
UPDATE b
SET
b.isFirstInstance =
CASE
WHEN
a.isLastInstance = 1
THEN 1
ELSE 0
END
FROM
#RawData a
LEFT JOIN
#RawData b ON
b.RawData_ID = a.RawData_ID + 1 --AND
--b.<NaturalKey> = a.<NaturalKey>
-- reduce the records to only those that are the first or last occurrence of a particular data set
CREATE TABLE #UniqueData
(
[UniqueData_ID] [int] IDENTITY(1,1) NOT NULL,
Start_Date DATETIME NOT NULL,
End_Date DATETIME NOT NULL,
Product VARCHAR(1) NULL,
isFirstOccurrence BIT NULL,
isLastOccurrence BIT NULL,
isFirstInstance BIT NULL,
isLastInstance BIT NULL
)
INSERT INTO #UniqueData
(
Start_Date,
End_Date,
Product,
isFirstOccurrence,
isLastOccurrence,
isFirstInstance,
isLastInstance
)
SELECT
Start_Date,
End_Date,
Product,
isFirstOccurrence,
isLastOccurrence,
isFirstInstance,
isLastInstance
FROM
#RawData
WHERE
isFirstOccurrence = 1 OR
isFirstInstance = 1 OR
isLastInstance = 1
ORDER BY RawData_ID, Start_Date
-- combine the first and last occurrences in any given sequence into a single row
SELECT
a.Start_Date,
ISNULL(b.Start_Date, a.End_Date) End_Date,
a.Product
FROM
#UniqueData a
LEFT JOIN
#UniqueData b ON
b.UniqueData_ID = a.UniqueData_ID + 1 AND
--b.<NaturalKey> = a.<NaturalKey> AND
a.isLastInstance <> 1
WHERE a.isFirstInstance = 1 or a.isFirstOccurrence = 1
ORDER BY a.UniqueData_ID
-- clean up
/*
DROP TABLE AdjacentValidity
DROP TABLE #RawData
DROP TABLE #UniqueData
*/
Tengo una serie de registros que contienen cierta información (tipo de producto) con validez temporal.
Me gustaría fusionar los intervalos de validez adyacentes, siempre que la información de agrupación (el tipo de producto) se mantenga igual. No puedo usar un simple GROUP BY
con MIN
y MAX
, porque algunos tipos de productos ( A
, en el ejemplo) pueden "desaparecer" y "regresar".
Usando Oracle 11g.
Una pregunta similar para MySQL es: ¿Cómo puedo hacer un grupo contiguo en MySQL?
| PRODUCT | START_DATE | END_DATE |
|---------|----------------------------------|----------------------------------|
| A | July, 01 2013 00:00:00+0000 | July, 31 2013 00:00:00+0000 |
| A | August, 01 2013 00:00:00+0000 | August, 31 2013 00:00:00+0000 |
| A | September, 01 2013 00:00:00+0000 | September, 30 2013 00:00:00+0000 |
| B | October, 01 2013 00:00:00+0000 | October, 31 2013 00:00:00+0000 |
| B | November, 01 2013 00:00:00+0000 | November, 30 2013 00:00:00+0000 |
| A | December, 01 2013 00:00:00+0000 | December, 31 2013 00:00:00+0000 |
| A | January, 01 2014 00:00:00+0000 | January, 31 2014 00:00:00+0000 |
| A | February, 01 2014 00:00:00+0000 | February, 28 2014 00:00:00+0000 |
| A | March, 01 2014 00:00:00+0000 | March, 31 2014 00:00:00+0000 |
| PRODUCT | START_DATE | END_DATE |
|---------|---------------------------------|----------------------------------|
| A | July, 01 2013 00:00:00+0000 | September, 30 2013 00:00:00+0000 |
| B | October, 01 2013 00:00:00+0000 | November, 30 2013 00:00:00+0000 |
| A | December, 01 2013 00:00:00+0000 | March, 31 2014 00:00:00+0000 |
Vea el completo SQL Fiddle .
Este es un problema de lagunas e islas. Hay varias formas de abordarlo; esto usa funciones analíticas de lead
y lag
:
select distinct product,
case when start_date is null then lag(start_date)
over (partition by product order by rn) else start_date end as start_date,
case when end_date is null then lead(end_date)
over (partition by product order by rn) else end_date end as end_date
from (
select product, start_date, end_date, rn
from (
select t.product,
case when lag(end_date)
over (partition by product order by start_date) is null
or lag(end_date)
over (partition by product order by start_date) != start_date - 1
then start_date end as start_date,
case when lead(start_date)
over (partition by product order by start_date) is null
or lead(start_date)
over (partition by product order by start_date) != end_date + 1
then end_date end as end_date,
row_number() over (partition by product order by start_date) as rn
from t
)
where start_date is not null or end_date is not null
)
order by start_date, product;
PRODUCT START_DATE END_DATE
------- ---------- ---------
A 01-JUL-13 30-SEP-13
B 01-OCT-13 30-NOV-13
A 01-DEC-13 31-MAR-14
La consulta más interna examina los registros anteriores y siguientes para el producto, y solo retiene la hora de inicio y / o finalización si los registros no son contiguos:
select t.product,
case when lag(end_date)
over (partition by product order by start_date) is null
or lag(end_date)
over (partition by product order by start_date) != start_date - 1
then start_date end as start_date,
case when lead(start_date)
over (partition by product order by start_date) is null
or lead(start_date)
over (partition by product order by start_date) != end_date + 1
then end_date end as end_date
from t;
PRODUCT START_DATE END_DATE
------- ---------- ---------
A 01-JUL-13
A
A 30-SEP-13
A 01-DEC-13
A
A
A 31-MAR-14
B 01-OCT-13
B 30-NOV-13
El siguiente nivel de selección elimina aquellos que están en el punto medio, donde ambas fechas fueron borradas por la consulta interna, lo que da:
PRODUCT START_DATE END_DATE
------- ---------- ---------
A 01-JUL-13
A 30-SEP-13
A 01-DEC-13
A 31-MAR-14
B 01-OCT-13
B 30-NOV-13
La consulta externa luego colapsa esos pares adyacentes; He utilizado la ruta fácil de crear duplicados y luego eliminarlos con distinct
, pero puedes hacerlo de otras maneras, como poner ambos valores en uno de los pares de filas y dejar ambos valores en el otro nulo, y luego eliminarlos con otra capa de seleccionar, pero creo que distinto está bien aquí.
Si su caso de uso en el mundo real tiene tiempos, no solo fechas, entonces tendrá que ajustar la comparación en la consulta interna; en lugar de +/- 1, un intervalo de 1 segundo quizás, o 1/86400 si lo prefiere, pero depende de la precisión de sus valores.
Parece que debería haber una manera más fácil, pero una combinación de una consulta analítica (para encontrar los diferentes espacios) y una consulta jerárquica (para conectar las filas que son continuas) funciona:
with data as (
select ''A'' product, to_date(''7/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''7/31/2013'', ''MM/DD/YYYY'') end_date from dual union all
select ''A'' product, to_date(''8/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''8/31/2013'', ''MM/DD/YYYY'') end_date from dual union all
select ''A'' product, to_date(''9/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''9/30/2013'', ''MM/DD/YYYY'') end_date from dual union all
select ''B'' product, to_date(''10/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''10/31/2013'', ''MM/DD/YYYY'') end_date from dual union all
select ''B'' product, to_date(''11/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''11/30/2013'', ''MM/DD/YYYY'') end_date from dual union all
select ''A'' product, to_date(''12/1/2013'', ''MM/DD/YYYY'') start_date, to_date(''12/31/2013'', ''MM/DD/YYYY'') end_date from dual union all
select ''A'' product, to_date(''1/1/2014'', ''MM/DD/YYYY'') start_date, to_date(''1/31/2014'', ''MM/DD/YYYY'') end_date from dual union all
select ''A'' product, to_date(''2/1/2014'', ''MM/DD/YYYY'') start_date, to_date(''2/28/2014'', ''MM/DD/YYYY'') end_date from dual union all
select ''A'' product, to_date(''3/1/2014'', ''MM/DD/YYYY'') start_date, to_date(''3/31/2014'', ''MM/DD/YYYY'') end_date from dual
),
start_points as
(
select product, start_date, end_date, prior_end+1, case when prior_end + 1 = start_date then null else ''Y'' end start_point
from (
select product, start_date, end_date, lag(end_date,1) over (partition by product order by end_date) prior_end
from data
)
)
select product, min(start_date) start_date, max(end_date) end_date
from (
select product, start_date, end_date, level, connect_by_root(start_date) root_start
from start_points
start with start_point = ''Y''
connect by prior end_date = start_date - 1
and prior product = product
)
group by product, root_start;
PRODUCT START_DATE END_DATE
------- ---------- ---------
A 01-JUL-13 30-SEP-13
A 01-DEC-13 31-MAR-14
B 01-OCT-13 30-NOV-13
Pruebe algo como:
with dat as (
select ''A'' as product, sysdate-3 as start_dte, sysdate-2 as end_dte from dual
union all
select ''A'' as product, sysdate-2 as start_dte, sysdate-1 as end_dte from dual
union all
select ''B'' as product, sysdate-5 as start_dte, sysdate-4 as end_dte from dual
)
SELECT product,
MIN(start_dte) KEEP (DENSE_RANK FIRST ORDER BY start_dte) "Start",
MAX(end_dte) KEEP (DENSE_RANK LAST ORDER BY end_dte) "End"
FROM dat
GROUP BY product
ORDER BY product;
Salida
PRODUCT Start End
A 2/24/2014 10:25:53 AM 2/26/2014 10:25:53 AM
B 2/22/2014 10:25:53 AM 2/23/2014 10:25:53 AM