快好知 kuaihz

使用SAP HANA Web-based Development工具进...

通过csv文件提供的数据库表内容:

links.csv的格式:

movies.csv格式,一个movie可以有多种风格(genres),通过|分隔:

ratings.csv:

用户给movie打得分:

tags.csv:movie的标签

练习一:

列出四张表的总记录数:

select "links"   as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.LINKS"union allselect "movies"  as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"union allselect "ratings" as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"union allselect "tags"    as "table name", count(1) as "row count" from "MOVIELENS"."public.aa.movielens.hdb::data.TAGS";

执行结果:

练习2:计算总共9125部电影,一共包含多少艺术类别?

DOBEGIN  DECLARE genreArray NVARCHAR(255) ARRAY;  DECLARE tmp NVARCHAR(255);  DECLARE idx INTEGER;  DECLARE sep NVARCHAR(1) := "|";  DECLARE CURSOR cur FOR SELECT DISTINCT "GENRES" FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES";  DECLARE genres NVARCHAR (255) := "";  idx := 1;  FOR cur_row AS cur() DO    SELECT cur_row."GENRES" INTO genres FROM DUMMY;    tmp := :genres;    WHILE LOCATE(:tmp,:sep) > 0 DO      genreArray[:idx] := SUBSTR_BEFORE(:tmp,:sep);      tmp := SUBSTR_AFTER(:tmp,:sep);      idx := :idx + 1;    END WHILE;    genreArray[:idx] := :tmp;  END FOR;  genreList = UNNEST(:genreArray) AS ("GENRE");  SELECT "GENRE" FROM :genreList GROUP BY "GENRE";END;

执行结果,总共包含18种:

练习3:计算每种艺术类别总共包含多少部电影:

DOBEGIN  DECLARE genreArray NVARCHAR(255) ARRAY;  DECLARE tmp NVARCHAR(255);  DECLARE idx INTEGER;  DECLARE sep NVARCHAR(1) := "|";  DECLARE CURSOR cur FOR SELECT DISTINCT "GENRES" FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES";  DECLARE genres NVARCHAR (255) := "";  idx := 1;  FOR cur_row AS cur() DO    SELECT cur_row."GENRES" INTO genres FROM DUMMY;    tmp := :genres;    WHILE LOCATE(:tmp,:sep) > 0 DO      genreArray[:idx] := SUBSTR_BEFORE(:tmp,:sep);      tmp := SUBSTR_AFTER(:tmp,:sep);      idx := :idx + 1;    END WHILE;    genreArray[:idx] := :tmp;  END FOR;  genreList = UNNEST(:genreArray) AS ("GENRE");  SELECT "GENRE", count(1) FROM :genreList GROUP BY "GENRE";END;

练习4:列出每部电影包含的风格数目:

SELECT    "MOVIEID"  , "TITLE"  , OCCURRENCES_REGEXPR("[|]" IN GENRES) + 1 "GENRE_COUNT"  , "GENRES"FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES"ORDER BY "GENRE_COUNT" ASC;

练习5:罗列出每部电影的风格分布情况

SELECT    "GENRE_COUNT"  , COUNT(1)FROM (  SELECT    OCCURRENCES_REGEXPR("[|]" IN "GENRES") + 1 "GENRE_COUNT"  FROM "MOVIELENS"."public.aa.movielens.hdb::data.MOVIES")GROUP BY "GENRE_COUNT" ORDER BY "GENRE_COUNT";

比如至少拥有1个风格的电影,有2793部,2个风格的电影有3039部,等等。

练习6:计算movie的rating分布情况

SELECT DISTINCT  MIN("RATING_COUNT") OVER( ) AS "MIN",  MAX("RATING_COUNT") OVER( ) AS "MAX",  AVG("RATING_COUNT") OVER( ) AS "AVG",  SUM("RATING_COUNT") OVER( ) AS "SUM",  MEDIAN("RATING_COUNT") OVER( ) AS "MEDIAN",  STDDEV("RATING_COUNT") OVER( ) AS "STDDEV",  COUNT(*) OVER( ) AS "CATEGORY_COUNT"FROM (  SELECT "MOVIEID", COUNT(1) as "RATING_COUNT"  FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"  GROUP BY "MOVIEID")GROUP BY "RATING_COUNT";

明细情况:

SELECT "RATING_COUNT", COUNT(1) as "MOVIE_COUNT"FROM (  SELECT "MOVIEID", COUNT(1) as "RATING_COUNT"  FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"  GROUP BY "MOVIEID")GROUP BY "RATING_COUNT" ORDER BY "RATING_COUNT" asc;

比如有397部电影的用户投票数为5票

练习7:统计用户投票情况

SELECT "RATING_COUNT", COUNT(1) as "USER_COUNT"FROM (  SELECT "USERID", COUNT(1) as "RATING_COUNT"  FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"  GROUP BY "USERID")GROUP BY "RATING_COUNT" ORDER BY 1 DESC;

有一位用户投了2391票,一位用户投了1868票:

练习8:统计用户投票得分情况

SELECT "RATING", COUNT(1) as "RATING_COUNT"FROM "MOVIELENS"."public.aa.movielens.hdb::data.RATINGS"GROUP BY "RATING" ORDER BY 1 DESC;

有15095份用户投票,打的分数是5分

要获取更多Jerry的原创文章,请关注公众号"汪子熙":

本站资源来自互联网,仅供学习,如有侵权,请通知删除,敬请谅解!
搜索建议:Development  Development词条  Web-based  Web-based词条  工具  工具词条  使用  使用词条  HANA  HANA词条