Hive实战—五个面试题

By timebusker on February 22, 2018

网站访问统计分析

# 用户名,月份,访问次数,url
A,2015-01,5,url2
A,2015-01,15,url123
B,2015-01,5,url13
A,2015-01,8,url22
B,2015-01,25,url23
A,2015-01,5,url11
A,2015-02,4,url12
A,2015-02,6,url23
B,2015-02,10,url23
B,2015-02,5,url122
A,2015-03,16,url33
A,2015-03,22,url123
B,2015-03,23,url12
B,2015-03,10,url13
B,2015-03,1,url11

create table tb_access(user string,date string,times bigint,url string) row format delimited fields terminated by ",";
load data local inpath '/root/data/tb_access' overwrite into table tb_access;
select * from tb_access;
  • 每月为止的最大单月访问次数、累计到该月的总访问次数、当月访问次数
select 
user,date,times,url,
max(times) over(partition by user order by date) as max_times,
sum(times) over(partition by user order by date) as sum_times,
sum(times) over(partition by user,date) as date_times
from tb_access;

学生课程成绩

// 字段解释:id, 学号, 课程, 成绩
CREATE TABLE `course` (
  `id` int,
  `sid` int ,
  `course` string,
  `score` int 
) ;

INSERT INTO `course` VALUES (1, 1, 'yuwen', 43);
INSERT INTO `course` VALUES (2, 1, 'shuxue', 55);
INSERT INTO `course` VALUES (3, 2, 'yuwen', 77);
INSERT INTO `course` VALUES (4, 2, 'shuxue', 88);
INSERT INTO `course` VALUES (5, 3, 'yuwen', 98);
INSERT INTO `course` VALUES (6, 3, 'shuxue', 65);
  • 所有数学课程成绩大于语文课程成绩的学生的学号
select id,sid,course,score,
max(score) over(partition by sid,course order by score) max_course
from course