sql - Sum Over 窗口函数的意外结果

这一直让我发疯,非常感谢任何帮助。下面的代码和结果 - 我的问题很简单。在结果第 4 行,Activity 与 lag_Activity 不匹配,为什么组数没有增加?

create table #exampleTable 
    (name varchar(20)
    ,pnum bigint
    ,activity varchar(10)
    ,startTime datetime
    ,endTime datetime)
insert into #exampleTable
values 
    ('Harry Potter',12345678, 'On Shift', '2022-05-18 13:00:00', '2022-05-18 22:00:00')
    ,('Harry Potter',12345678,'Off Shift','2022-05-18 16:30:00','2022-05-18 17:30:00')
    ,('Jane Doe',98765432,'Off Shift','2022-05-18 02:00:00','2022-05-18 05:00:00')
    ,('Jane Doe',98765432,'On Shift','2022-05-18 02:00:00','2022-05-18 16:00:00')
    ,('Jane Doe',98765432,'Off Shift','2022-05-18 06:15:00','2022-05-18 06:45:00')
    ,('Jane Doe',98765432,'Off Shift','2022-05-18 11:30:00','2022-05-18 12:00:00')
    ,('Jane Doe',98765432,'Off Shift','2022-05-18 12:00:00','2022-05-18 15:50:00')

select 
    sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime) as grp
    ,*
from (
    select 
        *
        ,lag(endtime) over(order by pnum, starttime) lag_endtime
        ,lag(activity)  over(order by pnum, starttime) lag_activity
    from #exampleTable
        where endtime-Starttime>0
    ) a
order by pnum, starttime

结果如下:

grp name            pnum        activity    lag_activity    startTime               endTime                 lag_endtime
1   Harry Potter    12345678    On Shift    NULL            2022-05-18 13:00:00.000 2022-05-18 22:00:00.000 NULL
2   Harry Potter    12345678    Off Shift   On Shift        2022-05-18 16:30:00.000 2022-05-18 17:30:00.000 2022-05-18 22:00:00.000
1   Jane Doe        98765432    Off Shift   Off Shift       2022-05-18 02:00:00.000 2022-05-18 05:00:00.000 2022-05-18 17:30:00.000
1   Jane Doe        98765432    On Shift    Off Shift       2022-05-18 02:00:00.000 2022-05-18 16:00:00.000 2022-05-18 05:00:00.000
2   Jane Doe        98765432    Off Shift   On Shift        2022-05-18 06:15:00.000 2022-05-18 06:45:00.000 2022-05-18 16:00:00.000
3   Jane Doe        98765432    Off Shift   Off Shift       2022-05-18 11:30:00.000 2022-05-18 12:00:00.000 2022-05-18 06:45:00.000
3   Jane Doe        98765432    Off Shift   Off Shift       2022-05-18 12:00:00.000 2022-05-18 15:50:00.000 2022-05-18 12:00:00.000

回答1

感谢@ConorCunninghamMSFT 的提示,滞后函数和 Sum Over 函数需要额外排序。订单本身不如它保持一致这一事实重要。

select 
    sum(case when activity = lag_activity and starttime <= lag_endtime then 0 else 1 end) over(partition by pnum order by pnum, starttime, endtime) as grp
    ,*
from (
    select 
        *
        ,lag(endtime) over(order by pnum, starttime, endtime) lag_endtime
        ,lag(activity)  over(order by pnum, starttime, endtime) lag_activity
    from #exampleTable
        where endtime-Starttime>0
    ) a
order by pnum, starttime, endtime

相似文章

最新文章