第一题
现有数据score_info.txt
文件,文件内容如下所示:
1 | 1001 01 90 |
编写HiveQL
代码,完成以下问题:
创建
score_info
表,字段名分别为uid
、subject_id
和score
。1
2
3
4
5
6
7
8create table if not exists score_info(
uid string,
subject_id string,
score int
)
row format delimited
fields terminated by '\t'
lines terminated by '\n';将数据导入
score_info
表。1
2
3load data local
inpath '/opt/module/hive-3.1.2/datas/score_info.txt'
overwrite into table score_info;求出每门学科的平均成绩。
1
2
3
4
5
6select uid,
score,
subject_id,
avg(score) over (partition by subject_id) avg_score
from
score_info找出所有科目成绩都大于某个学科平均成绩的学生。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18select
t2.uid
from
(
select
t1.uid,
if(score > avg_score, 0, 1) flag
from(
select uid,
score,
subject_id,
avg(score) over (partition by subject_id) avg_score
from
score_info
) t1
) t2
group by uid
having sum(t2.flag) = 0;
第二题
现有数据action.txt
文件,文件内容如下所示:
1 | u01 2017/1/21 5 |
编写HiveQL
代码,完成以下问题:
创建
action
表,字段名分别为userId
,visitDate
和visitCount
。1
2
3
4
5
6
7
8create table if not exists action(
userid string,
visitDate string,
visitCount int
)
row format delimited
fields terminated by '\t'
lines terminated by '\n';将数据导入
action
表。1
2
3load data local
inpath '/opt/module/hive-3.1.2/datas/action.txt'
into table action;统计出每个用户的月累计访问次数。
1
2
3
4
5select
userid,
substring(visitDate, 0, 6) visitMonth,
sum(visitCount) over (partition by userid, month(visitDate)) sumCount
from action;统计出每个用户的逐月累计访问次数。
1
2
3
4
5
6
7
8
9
10
11select
userid,
visitMonth,
max(sumCount)
from(
select
uid,
substring(visitDate, 0, 6) visitMonth,
sum(visitCount) over (partition by userid, month(visitDate) order by visitDate) sumCount
from action) t1
group by userid, visitMonth;