FoodMart数据仓库mysql数据初始化

FoodMart简介

FoodMart是一个小型的数据仓库的示例,它基于食品超市的场景。

Mondrian 是一个JAVA写的OLAP引擎.,它从JDBC里读取聚合的数据并缓存在内存里,同时支持MDX查询和提供olap4j、 XML/A 相关API。

:其它数据库如SQL Server、posgres、Oracle类似(即修改JDBC的URL并配置相应的JDBC驱动)。

下载mondrian-3.3.0


Mondrian download | SourceForge.net

2 点击File 选项后点击mondrian链接进入。

3 找到mondrian-3.3.0.14703后点击下载。

注:也可以选择3.2,当前使用的是3.3。

找到WAR包并解压 

在下载的mondrian-3.3.0.14703.zip里找到mondrian.war。

FoodMart数据仓库mysql表及数据初始化及重度汇总脚本_database

找到需要的JAR包拷贝出来

用解压工具提取处WAR包里的7个jar包并拷贝到指定目录,当前是D:\FoodMart

olap4j.jar

mondrian.jar

log4j-1.2.8.jar

commons-logging-1.0.4.jar

eigenbase-xom.jar

eigenbase-resgen.jar

eigenbase-properties.jar

FoodMart数据仓库mysql表及数据初始化及重度汇总脚本_jar_02

下载mysql驱动

当前下载的是mysql-connector-java-5.1.19.jar,并放在D:\FoodMart目录内。

拷贝数据文件对应SQL

将之前下载的mondrian-3.3.0.14703.zip里的FoodMartCreateData.sql拷贝到D:\FoodMart目录内。

FoodMart数据仓库mysql表及数据初始化及重度汇总脚本_jar_03

Mysql里新建库

CREATE DATABASE IF NOT EXISTS foodmartDEFAULT CHARSET utf8;

Windows命令里执行数据加载程序

打开windows命令行,执行如下程序

java -cp D:\FoodMart\mondrian.jar;D:\FoodMart\log4j-1.2.8.jar;D:\FoodMart\commons-logging-1.0.4.jar;D:\FoodMart\eigenbase-xom.jar;D:\FoodMart\eigenbase-resgen.jar;D:\FoodMart\eigenbase-properties.jar;D:\FoodMart\mysql-connector-java-5.1.20-bin.jar;D:\FoodMart\olap4j.jar;D:\FoodMart\mysql-connector-java-5.1.19.jar mondrian.test.loader.MondrianFoodMartLoader -verbose -tables -data -indexes -jdbcDrivers="com.mysql.jdbc.Driver" -inputFile=D:\FoodMart\FoodMartCreateData.sql -outputJdbcURL="jdbc:mysql://localhost:3309/foodmart?user=root&password=root1234

注:数据库名、用户名、密码、端口按需修改。  

FoodMart数据仓库mysql表及数据初始化及重度汇总脚本_jar_04

数据库里查看

-- 查看sales_fact_1998数据
SELECT * FROM sales_fact_1998 limit 10

FoodMart数据仓库mysql表及数据初始化及重度汇总脚本_bc_05

-- 查看表数量及数据大小
SELECT COUNT(TABLE_NAME) tabcnt,
(sum(DATA_LENGTH)+sum(INDEX_LENGTH))/1024.0/1024 dbsize_M
from information_schema.tables
where table_schema='foodmart';

 

FoodMart数据仓库mysql表及数据初始化及重度汇总脚本_mysql_06

重度汇总脚本

# Create aggregate tables for Mondrian

##################################################################
## agg_pl_01_sales_fact_1997 done
##################################################################
# physical
# lost "promotion_id" "store_id"

/*
7 fact tables: sales_fact_1997, sales_fact_1998, sales_fact_dec_1998, inventory_fact_1997, inventory_fact_1998, salary, expense_fact
19 dimension tables: product, customer, time_by_day, employee and more
11 aggregate tables
*/

INSERT INTO "agg_pl_01_sales_fact_1997" (
    "product_id",
    "time_id",
    "customer_id",
    "store_sales_sum",
    "store_cost_sum",
    "unit_sales_sum",
    "fact_count"
) SELECT
    "product_id" AS "product_id",
    "time_id" AS "time_id",
    "customer_id" AS "customer_id",
    SUM("store_sales") AS "store_sales",
    SUM("store_cost") AS "store_cost",
    SUM("unit_sales") AS "unit_sales",
    COUNT(*) AS fact_count
FROM "sales_fact_1997"
GROUP BY "product_id", "time_id", "customer_id";


INSERT INTO "agg_ll_01_sales_fact_1997" (
    "product_id",
    "time_id",
    "customer_id",
    "store_sales",
    "store_cost",
    "unit_sales",
    "fact_count"
) SELECT
    "product_id" AS "product_id",
    "time_id" AS "time_id",
    "customer_id" AS "customer_id",
    SUM("store_sales") AS "store_sales",
    SUM("store_cost") AS "store_cost",
    SUM("unit_sales") AS "unit_sales",
    COUNT(*) AS "fact_count"
FROM "sales_fact_1997"
GROUP BY "product_id", "time_id", "customer_id";

##################################################################
## agg_l_03_sales_fact_1997 done
##################################################################
# logical
# lost "product_id" "promotion_id" "store_id"


INSERT INTO "agg_l_03_sales_fact_1997" (
    "customer_id",
    "time_id",
    "store_sales",
    "store_cost",
    "unit_sales",
    "fact_count"
) SELECT
    "customer_id",
    "time_id",
    SUM("store_sales") AS "store_sales",
    SUM("store_cost") AS "store_cost",
    SUM("unit_sales") AS "unit_sales",
    COUNT(*) AS "fact_count"
FROM "sales_fact_1997"
GROUP BY "customer_id", "time_id";

##################################################################
## agg_lc_06_sales_fact_1997 done
##################################################################
# collapse "customer_id"
# lost "product_id" "promotion_id" "store_id"


INSERT INTO "agg_lc_06_sales_fact_1997" (
    "time_id",
    "city",
    "state_province",
    "country",
    "store_sales",
    "store_cost",
    "unit_sales",
    "fact_count"
) SELECT
    "B"."time_id",
    "D"."city",
    "D"."state_province",
    "D"."country",
    SUM("B"."store_sales") AS "store_sales",
    SUM("B"."store_cost") AS "store_cost",
    SUM("B"."unit_sales") AS "unit_sales",
    COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "customer" "D"
WHERE
    "B"."customer_id" = "D"."customer_id"
GROUP BY
         "B"."time_id",
         "D"."city",
         "D"."state_province",
         "D"."country";

##################################################################
## agg_l_04_sales_fact_1997 done
##################################################################
# logical
# lost "customer_id" "product_id" "promotion_id" "store_id"

INSERT INTO "agg_l_04_sales_fact_1997" (
    "time_id",
    "store_sales",
    "store_cost",
    "unit_sales",
    "customer_count",
    "fact_count"
) SELECT
    "time_id",
    SUM("store_sales") AS "store_sales",
    SUM("store_cost") AS "store_cost",
    SUM("unit_sales") AS "unit_sales",
    COUNT(DISTINCT "customer_id") AS "customer_count",
    COUNT(*) AS "fact_count"
FROM "sales_fact_1997"
GROUP BY "time_id";

##################################################################
## agg_c_10_sales_fact_1997 done
##################################################################
# collapse "time_id"
# lost "customer_id" "product_id" "promotion_id" "store_id"


INSERT INTO "agg_c_10_sales_fact_1997" (
    "month_of_year",
    "quarter",
    "the_year",
    "store_sales",
    "store_cost",
    "unit_sales",
    "customer_count",
    "fact_count"
) SELECT
    "D"."month_of_year",
    "D"."quarter",
    "D"."the_year",
    SUM("B"."store_sales") AS "store_sales",
    SUM("B"."store_cost") AS "store_cost",
    SUM("B"."unit_sales") AS "unit_sales",
    COUNT(DISTINCT "customer_id") AS "customer_count",
    COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
    "B"."time_id" = "D"."time_id"
GROUP BY
         "D"."month_of_year",
         "D"."quarter",
         "D"."the_year";

##################################################################
## agg_l_05_sales_fact_1997 done
##################################################################
# logical
# lost "time_id"


INSERT INTO "agg_l_05_sales_fact_1997" (
    "product_id",
    "customer_id",
    "promotion_id",
    "store_id",
    "store_sales",
    "store_cost",
    "unit_sales",
    "fact_count"
) SELECT
    "product_id",
    "customer_id",
    "promotion_id",
    "store_id",
    SUM("store_sales") AS "store_sales",
    SUM("store_cost") AS "store_cost",
    SUM("unit_sales") AS "unit_sales",
    COUNT(*) AS fact_count
FROM "sales_fact_1997"
GROUP BY "product_id", "customer_id", "promotion_id", "store_id";



##################################################################
## agg_c_14_sales_fact_1997 done
##################################################################
# collapse "time_id"


INSERT INTO "agg_c_14_sales_fact_1997" (
    "product_id",
    "customer_id",
    "promotion_id",
    "store_id",
    "month_of_year",
    "quarter",
    "the_year",
    "store_sales",
    "store_cost",
    "unit_sales",
    "fact_count"
) SELECT
    "B"."product_id",
    "B"."customer_id",
    "B"."promotion_id",
    "B"."store_id",
    "D"."month_of_year",
    "D"."quarter",
    "D"."the_year",
    SUM("B"."store_sales") AS "store_sales",
    SUM("B"."store_cost") AS "store_cost",
    SUM("B"."unit_sales") AS "unit_sales",
    COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
    "B"."time_id" = "D"."time_id"
GROUP BY "B"."product_id",
         "B"."customer_id",
         "B"."promotion_id",
         "B"."store_id",
         "D"."month_of_year",
         "D"."quarter",
         "D"."the_year";


##################################################################
## agg_lc_100_sales_fact_1997 done
##################################################################
# drop "promotion_id"
# drop "store_id"
# collapse "time_id"


INSERT INTO "agg_lc_100_sales_fact_1997" (
    "product_id",
    "customer_id",
    "quarter",
    "the_year",
    "store_sales",
    "store_cost",
    "unit_sales",
    "fact_count"
) SELECT
    "B"."product_id",
    "B"."customer_id",
    "D"."quarter",
    "D"."the_year",
    SUM("B"."store_sales") AS "store_sales",
    SUM("B"."store_cost") AS "store_cost",
    SUM("B"."unit_sales") AS "unit_sales",
    COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
    "B"."time_id" = "D"."time_id"
GROUP BY "B"."product_id",
         "B"."customer_id",
         "D"."quarter",
         "D"."the_year";


##################################################################
##################################################################
## SPECIAL
##################################################################
##################################################################
## agg_c_special_sales_fact_1997 done
## based upon agg_c_14_sales_fact_1997
##################################################################
# collapse "time_id"


INSERT INTO "agg_c_special_sales_fact_1997" (
    "product_id",
    "customer_id",
    "promotion_id",
    "store_id",
    "time_month",
    "time_quarter",
    "time_year",
    "store_sales_sum",
    "store_cost_sum",
    "unit_sales_sum",
    "fact_count"
) SELECT
    "B"."product_id",
    "B"."customer_id",
    "B"."promotion_id",
    "B"."store_id",
    "D"."month_of_year",
    "D"."quarter",
    "D"."the_year",
    SUM("B"."store_sales") AS "store_sales_sum",
    SUM("B"."store_cost") AS "store_cost_sum",
    SUM("B"."unit_sales") AS "unit_sales_sum",
    COUNT(*) AS "fact_count"
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
    "B"."time_id" = "D"."time_id"
GROUP BY "B"."product_id",
         "B"."customer_id",
         "B"."promotion_id",
         "B"."store_id",
         "D"."month_of_year",
         "D"."quarter",
         "D"."the_year";

##################################################################
# agg_gender_ms_state_sales_fact_1997
##################################################################

INSERT INTO "agg_g_ms_pcat_sales_fact_1997" (
    "gender",
    "marital_status",
    "product_family",
    "product_department",
    "product_category",
    "month_of_year",
    "quarter",
    "the_year",
    "store_sales",
    "store_cost",
    "unit_sales",
    "customer_count",
    "fact_count"
) SELECT
    "C"."gender",
    "C"."marital_status",
    "PC"."product_family",
    "PC"."product_department",
    "PC"."product_category",
    "T"."month_of_year",
    "T"."quarter",
    "T"."the_year",
    SUM("B"."store_sales") AS "store_sales",
    SUM("B"."store_cost") AS "store_cost",
    SUM("B"."unit_sales") AS "unit_sales",
    COUNT(DISTINCT "C"."customer_id") AS "customer_count",
    COUNT(*) AS "fact_count"
FROM "sales_fact_1997" "B",
    "time_by_day" "T",
    "product" "P",
    "product_class" "PC",
    "customer" "C"
WHERE
    "B"."time_id" = "T"."time_id"
AND "B"."customer_id" = "C"."customer_id"
AND "B"."product_id" = "P"."product_id"
AND "P"."product_class_id" = "PC"."product_class_id"
GROUP BY
    "C"."gender",
    "C"."marital_status",
    "PC"."product_family",
    "PC"."product_department",
    "PC"."product_category",
    "T"."month_of_year",
    "T"."quarter",
    "T"."the_year";

/*
# Above query, rephrased for Access (which does not support
# COUNT(DISTINCT ...) explicitly.
#
#INSERT INTO "agg_g_ms_pcat_sales_fact_1997" (
#    "gender",
#    "marital_status",
#    "product_family",
#    "product_department",
#    "product_category",
#    "month_of_year",
#    "quarter",
#    "the_year",
#    "store_sales",
#    "store_cost",
#    "unit_sales",
#    "customer_count",
#    "fact_count"
#) SELECT
#    "C"."gender",
#    "C"."marital_status",
#    "PC"."product_family",
#    "PC"."product_department",
#    "PC"."product_category",
#    "T"."month_of_year",
#    "T"."quarter",
#    "T"."the_year",
#    SUM("B"."store_sales") AS "store_sales",
#    SUM("B"."store_cost") AS "store_cost",
#    SUM("B"."unit_sales") AS "unit_sales",
#    (
#    SELECT COUNT("customer_id")
#    FROM (
#        SELECT DISTINCT
#            "DC"."gender",
#            "DC"."marital_status",
#            "DPC"."product_family",
#            "DPC"."product_department",
#            "DPC"."product_category",
#            "DT"."month_of_year",
#            "DT"."quarter",
#            "DT"."the_year",
#            "DB"."customer_id"
#        FROM
#            "sales_fact_1997" "DB",
#            "time_by_day" "DT",
#            "product" "DP",
#            "product_class" "DPC",
#            "customer" "DC"
#        WHERE
#            "DB"."time_id" = "DT"."time_id"
#        AND "DB"."customer_id" = "DC"."customer_id"
#        AND "DB"."product_id" = "DP"."product_id"
#        AND "DP"."product_class_id" = "DPC"."product_class_id") AS "CDC"
#    WHERE "CDC"."gender" = "C"."gender"
#    AND "CDC"."marital_status" = "C"."marital_status"
#    AND "CDC"."product_family" = "PC"."product_family"
#    AND "CDC"."product_department" = "PC"."product_department"
#    AND "CDC"."product_category" = "PC"."product_category"
#    AND "CDC"."month_of_year" = "T"."month_of_year"
#    AND "CDC"."quarter" = "T"."quarter"
#    AND "CDC"."the_year" = "T"."the_year"
#    GROUP BY
#        "gender",
#        "marital_status",
#        "product_family",
#        "product_department",
#        "product_category",
#        "month_of_year",
#        "quarter",
#        "the_year") AS "customer_count",
#    COUNT(*) AS "fact_count"
#FROM "sales_fact_1997" "B",
#    "time_by_day" "T",
#    "product" "P",
#    "product_class" "PC",
#    "customer" "C"
#WHERE
#    "B"."time_id" = "T"."time_id"
#AND "B"."customer_id" = "C"."customer_id"
#AND "B"."product_id" = "P"."product_id"
#AND "P"."product_class_id" = "PC"."product_class_id"
#GROUP BY
#    "C"."gender",
#    "C"."marital_status",
#    "PC"."product_family",
#    "PC"."product_department",
#    "PC"."product_category",
#    "T"."month_of_year",
#    "T"."quarter",
#    "T"."the_year";

# End insert.sql
*/

彩蛋 

可访问已经整理好的数据库建表及数据脚本。

链接:https://pan.baidu.com/s/1d4CikASBHF6qTV9hst-qvQ 
提取码:yx8c