2023年6月23日发(作者:)
hive中两数组类型的数据进⾏整合(array[])1、 –dws层经过处理后的数据1234567select _id as user_id , _book_id as arrays
--arrays字段在dws_qy_profile_reads_stat ,中该字段为 array[]类型的数据
from
dws_reads table1 ;2、 –dwd_qy_profile_cpsshardn_user_recently_read表提供增量数据数据来源 –统计每天的数据按照user_id字段进⾏聚合收集book_id字段, –收集后的数据集放在arrays字段中111213select _id as user_id, collect_set(_id) as arraysfrom
dwd_recently_read table2where
year = '2022' --${year}and month = '03' --${month}and day = '02' --${day}group by _id ;3、 –统计⽤户阅读过的书籍 –让历史数据跟增量数据进⾏整合 –先将两个数组字段中的集合值进⾏炸开(与视图配合使⽤)1718192627select _id as user_id,adidfrom (select _id as user_id , _book_id as arraysfrom
dws_reads table1
union allselect _id as user_id, collect_set(_id) as arraysfrom
dwd_recently_read table2where
year = '2022' --${year}and month = '03' --${month}and day = '02' --${day}group by _id
) temp LATERAL VIEW explode() adTable AS adid
--adTable 视图别名 ,adid 新列名4、 –然后在按照⽤户进⾏聚合8293637select _id, --, collect_set()from
(
select _id as user_id, adidfrom (select _id as user_id , _book_id as arraysfrom
dws_reads table1
union allselect _id as user_id, collect_set(_id) as arraysfrom
dwd_recently_read table2where
year = '2022' --${year}and month = '03' --${month}and day = '02' --${day}group by _id
) temp LATERAL VIEW explode() adTable AS adid
--adTable 视图别名 ,adid 新列名)tt
group by user_id; --24条数据
发布者:admin,转转请注明出处:http://www.yc00.com/news/1687517220a16328.html
评论列表(0条)