SQL Interview Questions and Solutions for Data Development
This article presents a collection of intermediate to advanced SQL interview problems covering grouping, window functions, sessionization, ranking, and join techniques, each accompanied by detailed solution queries and explanations to help data engineers prepare for technical interviews.
1. Overview
The author, a big‑data specialist, shares a set of SQL interview questions frequently encountered by data engineers, product managers, and developers, focusing on practical query techniques such as GROUP BY, window functions, self‑joins, and session generation.
2. Solving Approach
Questions are categorized by difficulty (simple, medium, hard) and each solution demonstrates the core SQL concepts required, often using Common Table Expressions (CTE), window functions, and conditional logic.
3. SQL Real‑World Questions
Question 1 – Categorize goods into top‑10, top‑10~20, and others, then count distinct browsing users per group.
create table if not exists test.nil_goods_category as
select goods_id,
case when nn<= 10 then 'top10'
when nn<= 20 then 'top10~top20'
else 'other' end as goods_group
from (
select goods_id,
row_number() over(partition by goods_id order by sale_sum desc) as nn
from (
select goods_id,sum(amount) as sale_sum
from order
group by 1
) aa
) bb;
select b.goods_group, count(distinct a.uid) as num
from pv a
left join test.nil_goods_category b
on a.goods_id = b.goods_id
group by 1;Question 2 – Count distinct goods that have an activity within a given time window.
-- Variant 1
select count(distinct g_id) as event_goods_num
from goods_event
where (t1<=t4 and t1>=t3)
or (t2>=t3 and t2<=t4);
-- Variant 2
select count(distinct g_id) as event_goods_num
from goods_event
where (t1<=t4 and t1>=t3)
union all
select count(distinct g_id) as event_goods_num
from goods_event
where (t2>=t3 and t2<=t4);Question 3 – Find the most frequently occurring goods and its latest activity time.
select a.goods_id, a.time
from event a
inner join (
select goods_id, count(*)
from event
group by goods_id
order by count(*) desc
limit 1
) b on a.goods_id = b.goods_id
order by a.time desc;Question 4 – Generate session identifiers for user login logs where a session is defined as activity within one hour.
drop table if exists koo.nil_temp0222_a2;
create table if not exists koo.nil_temp0222_a2 as
select *,
row_number() over(partition by userid order by inserttime) as nn1
from (
select a.*, b.inserttime as inserttime_aftr,
datediff(b.inserttime, a.inserttime) as session_diff
from (
select userid, inserttime,
row_number() over(partition by userid order by inserttime asc) nn
from koo.nil_temp0222
where userid = 1900000169
) a
left join (
select userid, inserttime,
row_number() over(partition by userid order by inserttime asc) nn
from koo.nil_temp0222
where userid = 1900000169
) b on a.userid = b.userid and a.nn = b.nn-1
) aa
where session_diff > 10 or nn = 1
order by userid, inserttime;
-- further steps to assign session_id
drop table if exists koo.nil_temp0222_a2_1;
create table if not exists koo.nil_temp0222_a2_1 as
select a.*, case when b.nn is null then a.nn+3 else b.nn end as nn_end
from koo.nil_temp0222_a2 a
left join koo.nil_temp0222_a2 b
on a.userid = b.userid and a.nn1 = b.nn1 - 1;
select a.*, b.nn1 as session_id
from (
select userid, inserttime,
row_number() over(partition by userid order by inserttime asc) nn
from koo.nil_temp0222
where userid = 1900000169
) a
left join koo.nil_temp0222_a2_1 b
on a.userid = b.userid and a.nn >= b.nn and a.nn < b.nn_end;Question 5 – Retrieve the last three orders of each month’s final day.
select *
from (
select *,
rank() over(partition by mm order by dd desc) as nn1,
row_number() over(partition by mm, dd order by inserttime desc) as nn2
from (
select cast(right(to_date(inserttime),2) as int) as dd,
month(inserttime) as mm,
userid, inserttime
from koo.nil_temp0222
) aa
) bb
where nn1 = 1 and nn2 <= 3;Question 6 – Find dates where three consecutive days each have visits greater than 100.
select a.*, b.num as num2, c.num as num3
from table a
left join table b on a.userid = b.userid and a.dt = date_add(b.dt, -1)
left join table c on a.userid = c.userid and a.dt = date_add(c.dt, -2)
where b.num > 100 and a.num > 100 and c.num > 100;Question 7 – Identify rows in table A whose 20 feature columns match those in table B, allowing at most one mismatch.
-- Exact match
select aa.*
from (
select *, concat(d1,d2,d3,…,d20) as mmd
from table_a
) aa
left join (
select id, concat(d1,d2,d3,…,d20) as mmd
from table_b
) bb on aa.id = bb.id and aa.mmd = bb.mmd;
-- One‑column mismatch allowed
select a.*, sum(d1_jp, d2_jp, …, d20_jp) as same_judge
from (
select a.*,
case when a.d1 = b.d1 then 1 else 0 end as d1_jp,
case when a.d2 = b.d2 then 1 else 0 end as d2_jp,
/* … repeat for d3 … d20 … */
from table_a a
left join table_b b on a.id = b.id
) aa
where sum(d1_jp, d2_jp, …, d20_jp) = 19;Question 8 – Compute the inner product of user rating vectors stored as (uid, goods_id, star).
select aa.uid1, aa.uid2, sum(star_multi) as result
from (
select a.uid as uid1,
b.uid as uid2,
a.goods_id,
a.star * b.star as star_multi
from t a
left join t b on a.goods_id = b.goods_id and a.uid <> b.uid
) aa
group by uid1, uid2;Question 9 – Calculate the median from a table of numbers with frequencies.
select a.*, b.s_mid_n, c.l_mid_n, avg(b.s_mid_n, c.l_mid_n)
from (
select case when mod(count(*),2)=0 then count(*)/2 else (count(*)+1)/2 end as s_mid,
case when mod(count(*),2)=0 then count(*)/2+1 else (count(*)+1)/2 end as l_mid
from table
) a
left join (
select id, num, row_number() over(partition by id order by num asc) nn
from table
) b on a.s_mid = b.nn
left join (
select id, num, row_number() over(partition by id order by num asc) nn
from table
) c on a.l_mid = c.nn;Question 10 – Find stores that have sales in every week of a given month.
select distinct credit_level
from (
select credit_level, count(distinct nn) as number
from (
select userid, credit_level, inserttime,
month(inserttime) as mm,
weekofyear(inserttime) as week,
dense_rank() over(partition by credit_level, month(inserttime) order by weekofyear(inserttime) asc) as nn
from koo.nil_temp0222
where substring(inserttime,1,7) = '2019-12'
) aa
group by credit_level
) bb
where number = (select count(distinct weekofyear(inserttime))
from koo.nil_temp0222
where substring(inserttime,1,7) = '2019-12');Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
Big Data Technology & Architecture
Wang Zhiwu, a big data expert, dedicated to sharing big data technology.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
