asmile1993
2022-05-20 11:27:16 +08:00
-- 测试数据
drop table t;
create table t(
id int auto_increment primary key,
cust_id int,
trx_dte date,
trx_amt decimal(10, 2)
);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-02-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-03-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10086, '2022-04-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10085, '2022-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10085, '2022-02-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-02-05', 13.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-03-05', 33.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-04-05', 53.56);
insert into t (cust_id, trx_dte, trx_amt) value(10010, '2021-05-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-01-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-02-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-04-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-05-05', 23.56);
insert into t (cust_id, trx_dte, trx_amt) value(10020, '2021-07-05', 23.56);
-- 递归调用生成 '2021-05-01' 到 '2022-05-30' 的日期表
with recursive Date_Ranges AS
(
select '2021-05-01' as Date
union all
select Date + interval 1 day
from Date_Ranges
where Date < '2022-05-30'
),
-- 生成月份相连,日期相同并且连续三个月的日期数组,例如:["2021-01-05", "2021-02-05", "2021-03-05"]
-- 注意这里限制了日期,因为过了 '2022-03-30' 后,就不再满足数据的过滤条件
date_array_ranges as (
select JSON_ARRAY(date, date_add(date, interval 1 month), date_add(date, interval 2 month)) json_array_trx_dte
from date_ranges
where date <= '2022-03-30'
),
-- 根据 cust_id ,trx_amt 进行聚合,并生成用户相同,金额相同的交易日期 json 数组
cust_trx_amt_dte_array as
(
select cust_id, trx_amt, JSON_ARRAYAGG(trx_dte) json_array_trx_dte
from t
group by cust_id, trx_amt
)
-- 由于是连续三个月,那么交易日期的 json 数组的数量肯定是大于等于 3
-- 在满足上述条件后,进一步判断交易日期 json 数组是否包含连续三个月日期相同的元素
-- 这里用 exists 来判断是为了避免数据重复
select *
from cust_trx_amt_dte_array a
where json_length(a.json_array_trx_dte) >= 3
and exists (select 1
from date_array_ranges b
where json_contains(a.json_array_trx_dte->'$', b.json_array_trx_dte)
);
-- 返回结果
+-----------+---------+----------------------------------------------------------+
| cust_id | trx_amt | json_array_trx_dte |
+-----------+---------+----------------------------------------------------------+
| 10086 | 23.56 | ["2022-01-05", "2022-02-05", "2022-03-05", "2022-04-05"] |
+---------------------+----------------------------------------------------------+