create table score(sid string, class string, score int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table score;

s1 A 89
s2 C 88
s3 A 92
s4 A 89
s5 B 90
s6 B 86
s7 C 92
s8 C 90
s9 A 85
s10 C 86
s11 B 86
s12 B 86
select * from (
    select class, sid, score, dense_rank() over(partition by class order by score desc) as rank
    from score
) t1
where t1.rank < 4;

t1.class	t1.sid	t1.score	t1.rank
A	s3	92	1
A	s4	89	2
A	s1	89	2
A	s9	85	3
B	s5	90	1
B	s11	86	2
B	s12	86	2
B	s6	86	2
C	s7	92	1
C	s8	90	2
C	s2	88	3



create table movie(movie_name string, category string)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table movie;

让子弹飞 动作、年代
长江七号 科幻
大进军 战争
select movie_name, category_name
from movie
lateral view explode(split(category, '、')) t1 as category_name;

movie_name	category_name
让子弹飞	动作
让子弹飞	年代
长江七号	科幻
大进军	战争



create table person(name string, age int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table person;

A 20
B 18
C 20
D 24
select concat_ws('|', collect_set(name)) as name_list, age
from person
group by age;

name_list	age
B	18
A|C	20
D	24


create table game(year int, class string, score int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table game;

2020 ClassA 10
2020 ClassB 12
2020 ClassC 9
2021 ClassA 12
2021 ClassB 8
select year,
max(case when class = 'ClassA' then score end) as A,
max(case when class = 'ClassB' then score end) as B,
max(case when class = 'ClassC' then score end) as C
from game
group by year;

year	a	b	c
2020	10	12	9
2021	12	8	NULL


create table login(uid int,	dt date, status int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table login;

1	2022-03-01	0
1	2022-03-02	1
1	2022-03-03	1
1	2022-03-04	1
1	2022-03-05	1
1	2022-03-06	0
1	2022-03-07	1
2	2022-03-01	0
2	2022-03-02	1
2	2022-03-03	1
2	2022-03-04	0
2	2022-03-05	0
2	2022-03-06	1
2	2022-03-07	1
3	2022-03-01	1
3	2022-03-02	1
3	2022-03-03	1
3	2022-03-04	0
3	2022-03-05	1
3	2022-03-06	1
3	2022-03-07	1
select *
from (
    select t1.uid, min(t1.dt) as start_day, count(*) as day_count
    from (
        select uid, dt, date_sub(dt, row_number() over(partition by uid order by dt asc)) as diff
        from login
        where status = 1 --筛选登录的日期
    ) t1
    group by t1.uid, t1.diff --按照用户、锚定日期分组
) t2
where t2.day_count > 2; --筛选出至少3次的连续登录操作

t2.uid	t2.start_day	t2.day_count
1	2022-03-02	4
3	2022-03-01	3
3	2022-03-05	3


create table champion(year int, team string)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table champion;

2000 Sun
2001 Lakers
2002 Rockets
2003 Rockets
2004 Rockets
2005 Spurs
2006 Spurs
2007 Sun
2008 Lakers
2009 Lakers
2010 Lakers
2011 Lakers
2012 Warriors
2013 Warriors
2014 Warriors
2015 Heat
2016 Warriors
2017 Cavaliers
2018 Warriors
2019 Sun
2020 Warriors
2021 Warriors
2022 Raptors
select t1.team, min(t1.year) as start_year, count(*) as count
from (
	select year, team, year - row_number() over(partition by team order by year asc) as diff
	from champion
) t1
group by t1.team, t1.diff --按照队伍、锚定年份分组
having count > 2; --筛选出至少3次的连续获得冠军

t2.team	t2.start_year	t2.count
Lakers	2008	4
Rockets	2002	3
Warriors	2012	3


create table login(id int, dt date)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table login;

1001 2022-07-01
1002 2022-07-01
1003 2022-07-01
1001 2022-07-02
1002 2022-07-03
1001 2022-07-04
1002 2022-07-05
1003 2022-07-05
1001 2022-07-07
1002 2022-07-07
1003 2022-07-08
1001 2022-07-09
1002 2022-07-09
select id, max(day_count) as day_count
from (
	select id, datediff(max(dt), min(dt)) + 1 as day_count
	from (
		select id, dt,
		sum(if(diff > 2, 1, 0)) over(partition by id order by dt asc) as flag
		from (
			select id, dt, 
			case when pre_dt is null then 0
			else datediff(dt, pre_dt) end as diff
			from (
				select id, dt,
				lag(dt) over(partition by id order by dt asc) as pre_dt
				from login
			) t1
		) t2
	) t3
	group by id, flag
) t4
group by id;

id	day_count
1001	4
1002	9
1003	1


create table price(stock string, time string, price int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table price;

A1 06:00 12
A1 09:00 16
A1 12:00 24
A1 15:00 17
A1 18:00 11
A1 21:00 13
B1 06:00 18
B1 09:00 12
B1 12:00 13
B1 15:00 13
B1 18:00 15
B1 21:00 17
C1 06:00 12
C1 09:00 13
C1 12:00 15
C1 15:00 17
C1 18:00 18
C1 21:00 20
select * from (
    select t1.stock, t1.time, t1.price, 'top' as top
    from (
        select stock, time, price,
        lag(price) over(partition by stock order by time asc) as previous, --前面时间点的价格
        lead(price) over(partition by stock order by time asc) as next --后面时间点的价格
        from price
    ) t1
    where t1.price > t1.previous and t1.price > t1.next --筛选出波峰点


    select t2.stock, t2.time, t2.price, 'down' as top
    from (
        select stock, time, price,
        lag(price) over(partition by stock order by time asc) as previous,
        lead(price) over(partition by stock order by time asc) as next
        from price
    ) t2
    where t2.price < t2.previous and t2.price < t2.next --筛选出波谷点
) t3;

t3.stock	t3.time	t3.price	t3.top
A1	12:00	24	top
A1	18:00	11	down
B1	09:00	12	down


create table click(id string, time int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table click;

a 1001
a 1005
a 1020
a 1048
a 1078
a 1230
a 1245
a 1270
a 1282
b 1101
b 1132
b 1156
b 1180
b 1200
b 1230
b 1345
b 1370
b 1400
select id, min(time) as start_time, count(*) as count, max(time) - min(time) as total_time
from (
    select t2.id, t2.time, sum(t2.value) over(partition by id order by time asc) as stage
    from (
        select id, time, diff, case when nvl(diff, 9999) > 30 then 1 else 0 end value
        from (
            select id, time, time - lag(time) over(partition by id order by time asc) as diff
            from click
        ) t1
    ) t2
) t3
group by id, stage;

id	start_time	count	total_time
a	1001	5	77
a	1230	4	52
b	1101	1	0
b	1132	5	98
b	1345	3	55


create table discount(brand int, start_dt date, end_dt date)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table discount;

1001 2022-07-01 2022-07-03
1001 2022-07-05 2022-07-10
1002 2022-07-02 2022-07-08
1002 2022-07-06 2022-07-09
1003 2022-07-12 2022-07-20
1003 2022-07-15 2022-07-18
1004 2022-07-20 2022-07-25
1004 2022-07-22 2022-07-26
1004 2022-07-28 2022-07-30
select brand, sum(count) as day_count
from (
	select brand, start_dt, end_dt, max_dt,
	case when max_dt is null then datediff(end_dt, start_dt) + 1
	when max_dt < start_dt then datediff(end_dt, start_dt) + 1
	when max_dt < end_dt then datediff(end_dt, max_dt)
	else 0 end as count
	from (
		select brand, start_dt, end_dt,
		max(end_dt) over(partition by brand order by start_dt asc, end_dt asc rows 
                         between unbounded preceding and 1 preceding) as max_dt
		from discount
	) t1
) t2
group by brand;

brand	day_count
1001	9
1002	8
1003	9
1004	11


create table live(id int, start_dt date, end_dt date)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table live;

1001 2022-07-01 2022-07-02
1001 2022-07-04 2022-07-05
1001 2022-07-07 2022-07-10
1001 2022-07-13 2022-07-18
1002 2022-07-01 2022-07-02
1002 2022-07-04 2022-07-05
1002 2022-07-07 2022-07-08
1002 2022-07-10 2022-07-11
1002 2022-07-13 2022-07-14
1002 2022-07-16 2022-07-17
1002 2022-07-19 2022-07-20
1003 2022-07-01 2022-07-20
1004 2022-07-04 2022-07-08
1004 2022-07-12 2022-07-16
1005 2022-07-03 2022-07-06
1005 2022-07-09 2022-07-11
1006 2022-07-04 2022-07-06
1007 2022-07-09 2022-07-12
1008 2022-07-06 2022-07-08
1008 2022-07-11 2022-07-13
1009 2022-07-06 2022-07-08
1009 2022-07-18 2022-07-19
1010 2022-07-11 2022-07-14
select dt, online
from (
	select dt, 
	sum(count) over(order by dt asc) as online
	from (
		select dt, sum(value) as count
		from (
			select id, start_dt as dt, 1 as value from live
			select id, date_add(end_dt, 1) as dt, -1 as value from live
		) t1
		group by dt
	) t2
) t3
order by online desc, dt asc;

dt	online
2022-07-04	6
2022-07-06	6
2022-07-07	6
2022-07-11	6
2022-07-13	6
2022-07-10	5
2022-07-12	5
2022-07-14	5
2022-07-09	4
2022-07-16	4
2022-07-01	3
2022-07-15	3
2022-07-17	3
2022-07-18	3
2022-07-19	3
2022-07-03	2
2022-07-20	2
2022-07-21	0


create table time_merge(id int, start_time int, end_time int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table time_merge;

1 12 15
2 57 58
3 29 32
4 30 31
5 17 19
6 44 44
7 56 57
8 16 18
select flag, min(start_time) as start_time, max(end_time) as end_time
from (
	select id, start_time, end_time,
	sum(count) over(order by start_time asc, end_time asc) as flag
	from (
		select id, start_time, end_time,
		case when max_dt is null then 1 --作为一个新的区间
		when max_dt < start_time then 1 --作为一个新的区间
		else 0 end as count --与前面的区间具有重叠
		from (
			select id, start_time, end_time,
			max(end_time) over(order by start_time asc, end_time asc rows 
                               between unbounded preceding and 1 preceding) as max_dt
			from time_merge
		) t1
	) t2
) t3
group by flag

flag	start_time	end_time
1	12	15
2	16	19
3	29	32
4	44	44
5	56	58


create table common_friend(id string, friends string)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table common_friend;

create table friend as
select id, friend from common_friend lateral view explode(split(friends, ',')) temp as friend;

select t1.ids, concat_ws(',', collect_list(t1.friend)) as common_friend
from (
	select a.friend, concat(a.id, ',', b.id) as ids
	from friend a
	join friend b
	on a.friend = b.friend --按照共同好友进行连接
	where a.id < b.id --筛选出重复记录
) t1
group by t1.ids

t1.ids	common_friend


create table maybe_friend(id string, friends string)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table maybe_friend;

with friend as (
select id, friend from common_friend lateral view explode(split(friends, ',')) temp as friend)

select t2.id1, t2.id2
from (
	select t1.id1, t1.id2
	from (
		select a.id as id1, b.id as id2, a.friend
		from friend a
		join friend b
		on a.friend = b.friend
		where a.id < b.id
	) t1
	group by t1.id1, t1.id2
	having count(t1.friend) >= 2
) t2
left join friend
on t2.id1 = friend.id
and t2.id2 = friend.friend
where friend.id is null --排除真实好友,筛选可能好友

t2.id1	t2.id2


create table shop(id string, product int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table shop;

A 1
A 2
A 1
A 3
B 2
B 3
B 4
B 5
B 2
C 1
C 2
C 1
D 1
D 3
D 6
with temp as (
select id, product from shop group by id, product)

select t4.id1 as id, t4.product
from (
	select t3.id1, t3.product
	from (
		select t2.id1, t2.id2, temp.product
		from (
			select t1.id1, t1.id2
			from (
				select a.id as id1, b.id as id2, a.product
				from temp a
				join temp b
				on a.product = b.product
				and a.id != b.id 
			) t1
			group by t1.id1, t1.id2
			having count(t1.product) >= 2
		) t2
		join temp
		on t2.id2 = temp.id
	) t3
	group by t3.id1, t3.product
) t4
left join temp
on t4.product = temp.product
and t4.id1 = temp.id --相同用户购买相同商品
where temp.product is null --排除已购买商品,筛选推荐商品

id	t4.product
A	4
A	5
A	6
B	1
C	3
D	2



角色分配:活跃、新增 a、留存 b、留存 c、流失 d、沉默 e、回流 f

create table login_action(uid string, login_date int)
row format delimited fields terminated by ' ';
load data local inpath '/temp/sql.txt' into table login_action;

d 20220321
e 20220321
f 20220321

a 20220322
b 20220322
d 20220322

a 20220323
b 20220323
c 20220323

a 20220324
b 20220324
c 20220324

a 20220325
b 20220325
c 20220325
f 20220325


create table user_add(uid string, add_date int);
insert into table user_add (select * from login_action where login_date = 20220321);

select t1.uid, t1.login_date
from login_action t1
left join user_add t2
on t1.uid = t2.uid
where t1.login_date = 20220322 --查询日期20220322的新增用户
and t2.uid is null --查询在用户表中不存在的记录,即这个日期的新增用户

t1.uid	t1.add_date
a	20220322
b	20220322

user_add.uid	user_add.add_date
d	20220321
e	20220321
f	20220321
a	20220322
b	20220322
c	20220323
--使用union all合并1日、2日的留存用户

select t1.uid, t1.login_date, t2.add_date
from login_action t1
join user_add t2
on t1.uid = t2.uid
where t1.login_date = 20220324 --查询日期20220324的留存用户
and t2.add_date = (20220324 - 1) --查询1日留存用户

union all --合并1日、2日留存用户

select t1.uid, t1.login_date, t2.add_date
from login_action t1
join user_add t2
on t1.uid = t2.uid
where t1.login_date = 20220324 --查询日期20220324的留存用户
and t2.add_date = (20220324 - 2) --查询2日留存用户

_u1.uid	_u1.login_date	_u1.add_date
c	20220324	20220323
b	20220324	20220322
a	20220324	20220322

select uid, max(login_date) as last_login
from login_action
where login_date <= 20220325 --查询日期20220325的流失用户
group by uid
having max(login_date) < (20220325 - 2) --超过2日表示流失用户

uid	last_login
d	20220322
e	20220321

select uid, max(login_date) as once_login
from login_action
group by uid
having count(login_date) = 1 --只有一次登录操作的用户表示沉默用户

uid	once_login
e	20220321

select t1.uid, t1.login_date, t2.last_login
from (
    select uid, login_date
    from login_action
    where login_date = 20220325 --查询20220325的活跃用户
) t1
    select uid, max(login_date) as last_login
    from login_action
    where login_date < 20220325 --查询20220325之前的流失用户
    group by uid
    having max(login_date) < (20220325 - 2)
) t2
on t1.uid = t2.uid

t1.uid	t1.login_date	t2.last_login
f	20220325	20220321






