Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

TPCH test on planner v2 #6060

Closed
BohuTANG opened this issue Jun 19, 2022 · 21 comments
Closed

TPCH test on planner v2 #6060

BohuTANG opened this issue Jun 19, 2022 · 21 comments
Assignees
Labels
A-planner Area: planner/optimizer C-testing Category: testing

Comments

@BohuTANG
Copy link
Member

BohuTANG commented Jun 19, 2022

Summary

Result:

OK: Q1, Q3, Q4, Q5, Q6, Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15(Rewrite), Q16, Q19, Q20, Q21, Q22
OOM: Q2, Q7, Q17, Q19, Q21
Error: Q16, Q18, Q20

Test PC

  • 16C48G

Generating Test Data (1G)

docker pull ghcr.io/databloom-ai/tpch-docker:main
docker run -it -v "$(pwd)":/data ghcr.io/databloom-ai/tpch-docker:main dbgen -vf -s 1

Create Database and Table

CREATE DATABASE IF NOT EXISTS tpch;
USE tpch;
CREATE TABLE IF NOT EXISTS nation  ( N_NATIONKEY  INTEGER NOT NULL,
                            N_NAME       VARCHAR NOT NULL,
                            N_REGIONKEY  INT NOT NULL,
                            N_COMMENT    VARCHAR);

CREATE TABLE IF NOT EXISTS region  ( R_REGIONKEY  INT NOT NULL,
       	               R_NAME       VARCHAR NOT NULL,
                       R_COMMENT    VARCHAR);

CREATE TABLE IF NOT EXISTS part  ( P_PARTKEY     INT NOT NULL,
                          P_NAME        VARCHAR NOT NULL,
                          P_MFGR        VARCHAR NOT NULL,
                          P_BRAND       VARCHAR NOT NULL,
                          P_TYPE        VARCHAR NOT NULL,
                          P_SIZE        INT NOT NULL,
                          P_CONTAINER   VARCHAR NOT NULL,
                          P_RETAILPRICE FLOAT NOT NULL,
                          P_COMMENT     VARCHAR NOT NULL);

CREATE TABLE IF NOT EXISTS supplier  ( S_SUPPKEY     INT NOT NULL,
                             S_NAME        VARCHAR NOT NULL,
                             S_ADDRESS     VARCHAR NOT NULL,
                             S_NATIONKEY   INT NOT NULL,
                             S_PHONE       VARCHAR NOT NULL,
                             S_ACCTBAL     FLOAT NOT NULL,
                             S_COMMENT     VARCHAR NOT NULL);

CREATE TABLE IF NOT EXISTS partsupp ( PS_PARTKEY     INT NOT NULL,
                             PS_SUPPKEY     INT NOT NULL,
                             PS_AVAILQTY    INT NOT NULL,
                             PS_SUPPLYCOST  FLOAT NOT NULL,
                             PS_COMMENT     VARCHAR NOT NULL);

CREATE TABLE IF NOT EXISTS customer  ( C_CUSTKEY     INT NOT NULL,
                             C_NAME        VARCHAR NOT NULL,
                             C_ADDRESS     VARCHAR NOT NULL,
                             C_NATIONKEY   INT NOT NULL,
                             C_PHONE       VARCHAR NOT NULL,
                             C_ACCTBAL     FLOAT   NOT NULL,
                             C_MKTSEGMENT  VARCHAR NOT NULL,
                             C_COMMENT     VARCHAR NOT NULL);

CREATE TABLE IF NOT EXISTS orders  ( O_ORDERKEY       INT NOT NULL,
                           O_CUSTKEY        INT NOT NULL,
                           O_ORDERSTATUS    VARCHAR NOT NULL,
                           O_TOTALPRICE     FLOAT NOT NULL,
                           O_ORDERDATE      DATE NOT NULL,
                           O_ORDERPRIORITY  VARCHAR NOT NULL,  
                           O_CLERK          VARCHAR NOT NULL, 
                           O_SHIPPRIORITY   INT NOT NULL,
                           O_COMMENT        VARCHAR NOT NULL);

CREATE TABLE IF NOT EXISTS lineitem ( L_ORDERKEY    INT NOT NULL,
                             L_PARTKEY     INT NOT NULL,
                             L_SUPPKEY     INT NOT NULL,
                             L_LINENUMBER  INT NOT NULL,
                             L_QUANTITY    FLOAT NOT NULL,
                             L_EXTENDEDPRICE  FLOAT NOT NULL,
                             L_DISCOUNT    FLOAT NOT NULL,
                             L_TAX         FLOAT NOT NULL,
                             L_RETURNFLAG  VARCHAR NOT NULL,
                             L_LINESTATUS  VARCHAR NOT NULL,
                             L_SHIPDATE    DATE NOT NULL,
                             L_COMMITDATE  DATE NOT NULL,
                             L_RECEIPTDATE DATE NOT NULL,
                             L_SHIPINSTRUCT VARCHAR NOT NULL,
                             L_SHIPMODE     VARCHAR NOT NULL,
                             L_COMMENT      VARCHAR NOT NULL);

Load TPCH data to databend

#!/bin/bash

for t in customer lineitem nation orders partsupp part region supplier
do
    echo "$t"
    curl -XPUT 'http://root:@127.0.0.1:8000/v1/streaming_load' -H 'insert_sql: insert into tpch.'$t' format CSV' -H 'skip_header: 0' -H 'field_delimiter:|' -H 'record_delimiter: \n' -F 'upload=@"./'$t'.tbl"'
done

Queries

Q1(OK)

select
   l_returnflag,
   l_linestatus,
   sum(l_quantity) as sum_qty,
   sum(l_extendedprice) as sum_base_price,
   sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
   sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
   avg(l_quantity) as avg_qty,
   avg(l_extendedprice) as avg_price,
   avg(l_discount) as avg_disc,
   count(*) as count_order 
from
   lineitem 
where
   l_shipdate < '1998-12-01' 
group by
   l_returnflag,
   l_linestatus 
order by
   l_returnflag,
   l_linestatus LIMIT 1;

Q2(OOM)

select
        s_acctbal,
        s_name,
        n_name,
        p_partkey,
        p_mfgr,
        s_address,
        s_phone,
        s_comment
from
        part,
        supplier,
        partsupp,
        nation,
        region
-- so many criteria...
-- In EUROPE, the one with min 'ps_supplycost' and p_size, p_type...
where
        p_partkey = ps_partkey
        and s_suppkey = ps_suppkey
        and p_size = 15
        and p_type like '%BRASS'
        and s_nationkey = n_nationkey
        and n_regionkey = r_regionkey
        and r_name = 'EUROPE'
        and ps_supplycost = (
                select
                        min(ps_supplycost)
                from
                        partsupp,
                        supplier,
                        nation,
                        region
                -- to make sure the corresponding region of the record is 'EUROPE' Q5
                where
                        p_partkey = ps_partkey
                        and s_suppkey = ps_suppkey
                        and s_nationkey = n_nationkey
                        and n_regionkey = r_regionkey
                        and r_name = 'EUROPE'
        )
-- Sort using 4 factors, the first 's_acctbal' in decreasing order.
order by
        s_acctbal desc,
        n_name,
        s_name,
        p_partkey
-- Only shows the top 100 records
limit 100

Q3(OK)

select
   l_orderkey,
   sum(l_extendedprice * (1 - l_discount)) as revenue,
   o_orderdate,
   o_shippriority 
from
   customer,
   orders,
   lineitem 
where
   c_mktsegment = 'AUTOMOBILE' 
   and c_custkey = o_custkey 
   and l_orderkey = o_orderkey 
   and o_orderdate < '1995-03-13' 
   and l_shipdate > '1995-03-13' 
group by
   l_orderkey,
   o_orderdate,
   o_shippriority 
order by
   revenue desc,
   o_orderdate limit 10;

Q4(OK)

select
   o_orderpriority,
   count(*) as order_count 
from
   orders 
where
   o_orderdate >= '1995-01-01' 
   and o_orderdate < date_add(to_date('1995-01-01'), 3, month) 
   and exists 
   (
      select
         * 
      from
         lineitem 
      where
         l_orderkey = o_orderkey 
         and l_commitdate < l_receiptdate
   )
group by
   o_orderpriority 
order by
   o_orderpriority;

Q5(OK)

select
   n_name,
   sum(l_extendedprice * (1 - l_discount)) as revenue 
from
   customer,
   orders,
   lineitem,
   supplier,
   nation,
   region 
where
   c_custkey = o_custkey 
   and l_orderkey = o_orderkey 
   and l_suppkey = s_suppkey 
   and c_nationkey = s_nationkey 
   and s_nationkey = n_nationkey 
   and n_regionkey = r_regionkey 
   and r_name = 'MIDDLE EAST' 
   and o_orderdate >= '1994-01-01' 
   and o_orderdate < date_add(to_date('1994-01-01'), 1, year) 
group by
   n_name 
order by
   revenue desc;

Q6(OK)

select
   sum(l_extendedprice * l_discount) as revenue 
from
   lineitem 
where
   l_shipdate >= '1994-01-01' 
   and l_shipdate < date_add(to_date('1994-01-01'), 1, year) 
   and l_discount between 0.06 - 0.01 and 0.06 + 0.01 
   and l_quantity < 24;

Q7(OOM)

select
	supp_nation,
	cust_nation,
	l_year,
	sum(volume) as revenue
from
	(
	select
	n1.n_name as supp_nation,
	n2.n_name as cust_nation,
	extract(year from l_shipdate) as l_year,
	l_extendedprice * (1 - l_discount) as volume
	from
	supplier,
	lineitem,
	orders,
	customer,
	nation n1,
	nation n2
	where
	s_suppkey = l_suppkey
	and o_orderkey = l_orderkey
	and c_custkey = o_custkey
	and s_nationkey = n1.n_nationkey
	and c_nationkey = n2.n_nationkey
	and ((n1.n_name = 'JAPAN' and n2.n_name = 'INDIA')
	or (n1.n_name = 'INDIA' and n2.n_name = 'JAPAN'))
	and l_shipdate between '1995-01-01' and '1996-12-31'
	) as shipping
group by
	supp_nation,
	cust_nation,
	l_year
order by
	supp_nation,
	cust_nation,
	l_year;

Q8(OK)

select
	o_year,
	sum(case
		when nation = 'INDIA' then volume
		else 0
	end) / sum(volume) as mkt_share
from
	(
	select
	extract(year from o_orderdate) as o_year,
	l_extendedprice * (1 - l_discount) as volume,
	n2.n_name as nation
	from
	part,
	supplier,
	lineitem,
	orders,
	customer,
	nation n1,
	nation n2,
	region
	where
	p_partkey = l_partkey
	and s_suppkey = l_suppkey
	and l_orderkey = o_orderkey
	and o_custkey = c_custkey
	and c_nationkey = n1.n_nationkey
	and n1.n_regionkey = r_regionkey
	and r_name = 'ASIA'
	and s_nationkey = n2.n_nationkey
	and o_orderdate between '1995-01-01' and '1996-12-31'
	and p_type = 'SMALL PLATED COPPER'
	) as all_nations
group by
	o_year
order by
	o_year;

Q9(OK)

select
   nation,
   o_year,
   sum(amount) as sum_profit 
from
   (
      select
         n_name as nation,
         extract(year 
      from
         o_orderdate) as o_year,
         l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount 
      from
         part,
         supplier,
         lineitem,
         partsupp,
         orders,
         nation 
      where
         s_suppkey = l_suppkey 
         and ps_suppkey = l_suppkey 
         and ps_partkey = l_partkey 
         and p_partkey = l_partkey 
         and o_orderkey = l_orderkey 
         and s_nationkey = n_nationkey 
         and p_name like '%dim%' 
   )
   as profit 
group by
   nation,
   o_year 
order by
   nation,
   o_year desc;

Q10(OK)

select
   c_custkey,
   c_name,
   sum(l_extendedprice * (1 - l_discount)) as revenue,
   c_acctbal,
   n_name,
   c_address,
   c_phone,
   c_comment 
from
   customer,
   orders,
   lineitem,
   nation 
where
   c_custkey = o_custkey 
   and l_orderkey = o_orderkey 
   and o_orderdate >= '1993-08-01' 
   and o_orderdate < date_add(to_date('1993-08-01'), 3, month) 
   and l_returnflag = 'R' 
   and c_nationkey = n_nationkey 
group by
   c_custkey,
   c_name,
   c_acctbal,
   c_phone,
   n_name,
   c_address,
   c_comment 
order by
   revenue desc limit 20;

Q11(OK)

select
   ps_partkey,
   sum(ps_supplycost * ps_availqty) as value 
from
   partsupp,
   supplier,
   nation 
where
   ps_suppkey = s_suppkey 
   and s_nationkey = n_nationkey 
   and n_name = 'MOZAMBIQUE' 
group by
   ps_partkey 
having
   sum(ps_supplycost * ps_availqty) > ( 
   select
      sum(ps_supplycost * ps_availqty) * 0.0001000000 
   from
      partsupp, supplier, nation 
   where
      ps_suppkey = s_suppkey 
      and s_nationkey = n_nationkey 
      and n_name = 'MOZAMBIQUE' ) 
   order by
      value desc;

Q12(OK)

select
   l_shipmode,
   sum(
   case
      when
         o_orderpriority = '1-URGENT' 
         or o_orderpriority = '2-HIGH' 
      then
         1 
      else
         0 
   end
) as high_line_count, sum(
   case
      when
         o_orderpriority <> '1-URGENT' 
         and o_orderpriority <> '2-HIGH' 
      then
         1 
      else
         0 
   end
) as low_line_count 
from
   orders, lineitem 
where
   o_orderkey = l_orderkey 
   and l_shipmode in 
   (
      'RAIL', 'FOB'
   )
   and l_commitdate < l_receiptdate 
   and l_shipdate < l_commitdate 
   and l_receiptdate >= '1997-01-01' 
   and l_receiptdate < date_add(to_date('1997-01-01'), 1, year) 
group by
   l_shipmode 
order by
   l_shipmode;

Q13(OK)

select
   c_count,
   count(*) as custdist 
from
   (
      select
         c_custkey,
         count(o_orderkey) as c_count 
      from
         customer 
         left outer join
            orders 
            on c_custkey = o_custkey 
            and o_comment not like '%pending%deposits%' 
      group by
         c_custkey 
   )
   c_orders 
group by
   c_count 
order by
   custdist desc,
   c_count desc;

Q14(OK)

select
   100.00 * sum(
   case
      when
         p_type like 'PROMO%' 
      then
         l_extendedprice * (1 - l_discount) 
      else
         0 
   end
) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue 
from
   lineitem, part 
where
   l_partkey = p_partkey 
   and l_shipdate >= '1996-12-01' 
   and l_shipdate < date_add(to_date('1996-12-01'), 1, month);

Q15(Rewrite by @leiysky)

cselect
	supp_nation,
	cust_nation,
	l_year,
	sum(volume) as revenue
from
	(
		select
			n1.n_name as supp_nation,
			n2.n_name as cust_nation,
			extract(year from l_shipdate) as l_year,
			l_extendedprice * (1 - l_discount) as volume
		from
			orders,
			lineitem,
			supplier,
			customer,
			nation n1,
			nation n2
		where
			s_suppkey = l_suppkey
			and o_orderkey = l_orderkey
			and c_custkey = o_custkey
			and s_nationkey = n1.n_nationkey
			and c_nationkey = n2.n_nationkey
			and (
				(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
				or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
			)
			and l_shipdate between to_date('1995-01-01') and to_date('1996-12-31')
	) as shipping
group by
	supp_nation,
	cust_nation,
	l_year
order by
	supp_nation,
	cust_nation,
	l_year;

Q16(Error)

select
   p_brand,
   p_type,
   p_size,
   count(distinct ps_suppkey) as supplier_cnt 
from
   partsupp,
   part 
where
   p_partkey = ps_partkey 
   and p_brand <> 'Brand#34' 
   and p_type not like 'LARGE BRUSHED%' 
   and p_size in 
   (
      48,
      19,
      12,
      4,
      41,
      7,
      21,
      39
   )
   and ps_suppkey not in 
   (
      select
         s_suppkey 
      from
         supplier 
      where
         s_comment like '%Customer%Complaints%' 
   )
group by
   p_brand,
   p_type,
   p_size 
order by
   supplier_cnt desc,
   p_brand,
   p_type,
   p_size;

Error:

ERROR 1105 (HY000): Code: 1002, displayText = Unsupported expr: InSubquery { span: [NOT(342..345), IN(346..348), LParen(353..354), SELECT(361..367), Ident(377..386), FROM(394..398), Ident(408..416), WHERE(424..429), Ident(439..448), LIKE(449..453), QuotedString(454..477), RParen(482..483)], expr: ColumnRef { span: [Ident(331..341)], database: None, table: None, column: Identifier { name: "ps_suppkey", quote: None, span: Ident(331..341) } }, subquery: Query { span: [SELECT(361..367), Ident(377..386), FROM(394..398), Ident(408.

Q17(OOM)

select
   sum(l_extendedprice) / 7.0 as avg_yearly 
from
   lineitem,
   part 
where
   p_partkey = l_partkey 
   and p_brand = 'Brand#44' 
   and p_container = 'WRAP PKG' 
   and l_quantity < ( 
   select
      0.2 * avg(l_quantity) 
   from
      lineitem 
   where
      l_partkey = p_partkey );

Q18(Error)

select
   c_name,
   c_custkey,
   o_orderkey,
   o_orderdate,
   o_totalprice,
   sum(l_quantity) 
from
   customer,
   orders,
   lineitem 
where
   o_orderkey in 
   (
      select
         l_orderkey 
      from
         lineitem 
      group by
         l_orderkey 
      having
         sum(l_quantity) > 314 
   )
   and c_custkey = o_custkey 
   and o_orderkey = l_orderkey 
group by
   c_name,
   c_custkey,
   o_orderkey,
   o_orderdate,
   o_totalprice 
order by
   o_totalprice desc,
   o_orderdate limit 100;

Error:

ERROR 1105 (HY000): Code: 1002, displayText = Unsupported expr: InSubquery { span: [IN(162..164), LParen(169..170), SELECT(177..183), Ident(193..203), FROM(211..215), Ident(225..233), GROUP(241..246), BY(247..249), Ident(259..269), HAVING(277..283), Ident(293..296), LParen(296..297), Ident(297..307), RParen(307..308), Gt(309..310), LiteralInteger(311..314), RParen(319..320)], expr: ColumnRef { span: [Ident(151..161)], database: None, table: None, column: Identifier { name: "o_orderkey", quote: None, span: Ident(151..161) } },

Q19(OOM)

select
   sum(l_extendedprice* (1 - l_discount)) as revenue 
from
   lineitem,
   part 
where
   (
      p_partkey = l_partkey 
      and p_brand = 'Brand#52' 
      and p_container in 
      (
         'SM CASE',
         'SM BOX',
         'SM PACK',
         'SM PKG'
      )
      and l_quantity >= 4 
      and l_quantity <= 4 + 10 
      and p_size between 1 and 5 
      and l_shipmode in 
      (
         'AIR',
         'AIR REG'
      )
      and l_shipinstruct = 'DELIVER IN PERSON' 
   )
   or 
   (
      p_partkey = l_partkey 
      and p_brand = 'Brand#11' 
      and p_container in 
      (
         'MED BAG',
         'MED BOX',
         'MED PKG',
         'MED PACK'
      )
      and l_quantity >= 18 
      and l_quantity <= 18 + 10 
      and p_size between 1 and 10 
      and l_shipmode in 
      (
         'AIR',
         'AIR REG'
      )
      and l_shipinstruct = 'DELIVER IN PERSON' 
   )
   or 
   (
      p_partkey = l_partkey 
      and p_brand = 'Brand#51' 
      and p_container in 
      (
         'LG CASE',
         'LG BOX',
         'LG PACK',
         'LG PKG'
      )
      and l_quantity >= 29 
      and l_quantity <= 29 + 10 
      and p_size between 1 and 15 
      and l_shipmode in 
      (
         'AIR',
         'AIR REG'
      )
      and l_shipinstruct = 'DELIVER IN PERSON' 
   )
;

Q20(Error)

select
   s_name,
   s_address 
from
   supplier,
   nation 
where
   s_suppkey in 
   (
      select
         ps_suppkey 
      from
         partsupp 
      where
         ps_partkey in 
         (
            select
               p_partkey 
            from
               part 
            where
               p_name like 'green%' 
         )
         and ps_availqty > ( 
         select
            0.5 * sum(l_quantity) 
         from
            lineitem 
         where
            l_partkey = ps_partkey 
            and l_suppkey = ps_suppkey 
            and l_shipdate >= '1993-01-01' 
            and l_shipdate < date_add(to_date('1993-01-01'), 1, year) ) 
   )
   and s_nationkey = n_nationkey 
   and n_name = 'ALGERIA' 
order by
   s_name;

Error:

ERROR 1105 (HY000): Code: 1002, displayText = Unsupported expr: InSubquery { span: [IN(80..82), LParen(87..88), SELECT(95..101), Ident(111..121), FROM(129..133), Ident(143..151), WHERE(159..164), Ident(174..184), IN(185..187), LParen(198..199), SELECT(212..218), Ident(234..243), FROM(257..261), Ident(277..281), WHERE(295..300), Ident(316..322), LIKE(323..327), QuotedString(328..336), RParen(347..348), AND(358..361), Ident(362..373), Gt(374..375), LParen(376..377), SELECT(388..394), LiteralFloat(407..410), Multiply(411..412), 

Q21(OOM)

select
   s_name,
   count(*) as numwait 
from
   supplier,
   lineitem l1,
   orders,
   nation 
where
   s_suppkey = l1.l_suppkey 
   and o_orderkey = l1.l_orderkey 
   and o_orderstatus = 'F' 
   and l1.l_receiptdate > l1.l_commitdate 
   and exists 
   (
      select
         * 
      from
         lineitem l2 
      where
         l2.l_orderkey = l1.l_orderkey 
         and l2.l_suppkey <> l1.l_suppkey 
   )
   and not exists 
   (
      select
         * 
      from
         lineitem l3 
      where
         l3.l_orderkey = l1.l_orderkey 
         and l3.l_suppkey <> l1.l_suppkey 
         and l3.l_receiptdate > l3.l_commitdate 
   )
   and s_nationkey = n_nationkey 
   and n_name = 'EGYPT' 
group by
   s_name 
order by
   numwait desc,
   s_name limit 100;

Q22(OK)

select
   cntrycode,
   count(*) as numcust,
   sum(c_acctbal) as totacctbal 
from
   (
      select
         substring(c_phone 
      from
         1 for 2) as cntrycode,
         c_acctbal 
      from
         customer 
      where
         substring(c_phone 
      from
         1 for 2) in 
         (
            '20',
            '40',
            '22',
            '30',
            '39',
            '42',
            '21'
         )
         and c_acctbal > ( 
         select
            avg(c_acctbal) 
         from
            customer 
         where
            c_acctbal > 0.00 
            and substring(c_phone 
         from
            1 for 2) in 
            (
               '20',
               '40',
               '22',
               '30',
               '39',
               '42',
               '21'
            )
) 
            and not exists 
            (
               select
                  * 
               from
                  orders 
               where
                  o_custkey = c_custkey 
            )
   )
   as custsale 
group by
   cntrycode 
order by
   cntrycode;
@BohuTANG
Copy link
Member Author

Queries are referenced to https://github.com/pingcap/tidb/blob/master/cmd/explaintest/t/tpch.test
This is the first ALL test of TPCH, cc @leiysky @xudong963 @andylokandy

@leiysky
Copy link
Contributor

leiysky commented Jun 20, 2022

#6073 will fix the Projection error of Q4 and Q22.

@BohuTANG
Copy link
Member Author

Q15:

create view revenue0 (supplier_no, total_revenue) 

seems like Tabular SQL UDFs (UDTFs)

I think we can skip this test and wait until the UDTFs are ready.

@leiysky
Copy link
Contributor

leiysky commented Jun 20, 2022

Q15:

create view revenue0 (supplier_no, total_revenue) 

seems like Tabular SQL UDFs (UDTFs)

I think we can skip this test and wait until the UDTFs are ready.

It's actually a alias, we can rewrite as:

create view revenue0 as
	select
		l_suppkey as supplier_no,
		sum(l_extendedprice * (1 - l_discount)) as total_revenue
	from
		lineitem
	where
		l_shipdate >= to_date('1996-01-01')
		and l_shipdate < addMonths(to_date('1996-01-01'), 3)
	group by
		l_suppkey;

@leiysky
Copy link
Contributor

leiysky commented Jun 21, 2022

The error of Q5 is fixed by #6095

@xudong963
Copy link
Member

Q13 is supported by #6145

@xudong963
Copy link
Member

Q7 is supported but needs much time to get the result @BohuTANG
It involves multiple tables of different sizes to join, and we can introduce a join reorder optimization rule to optimize it

mysql> source tpch-q7.sql
+-------------+-------------+--------+-------------------+
| supp_nation | cust_nation | l_year | revenue           |
+-------------+-------------+--------+-------------------+
| FRANCE      | GERMANY     |   1995 | 54639732.73359999 |
| FRANCE      | GERMANY     |   1996 | 54633083.30759999 |
| GERMANY     | FRANCE      |   1995 | 52531746.66969999 |
| GERMANY     | FRANCE      |   1996 |     52520549.0224 |
+-------------+-------------+--------+-------------------+
4 rows in set (11 min 49.75 sec)
Read 7661265 rows, 1.12 GiB in 709.664 sec., 10.8 thousand rows/sec., 1.61 MiB/sec.

@xudong963
Copy link
Member

Result:

OK: Q1, Q3, Q6, Q8, Q9, Q10, Q11, Q12, Q14, Q4, Q22, Q13, Q5 OOM: Q2, Q17, Q19, Q21 Error: Q4, Q5, Q7, Q13, Q15, Q16, Q18, Q20, Q22

Update the result, beside Q15, we only have seven queries left that didn't pass

@leiysky
Copy link
Contributor

leiysky commented Jun 23, 2022

It involves multiple tables of different sizes to join,

This can be rewritten into:

select
	supp_nation,
	cust_nation,
	l_year,
	sum(volume) as revenue
from
	(
		select
			n1.n_name as supp_nation,
			n2.n_name as cust_nation,
			extract(year from l_shipdate) as l_year,
			l_extendedprice * (1 - l_discount) as volume
		from
			orders,
			lineitem,
			supplier,
			customer,
			nation n1,
			nation n2
		where
			s_suppkey = l_suppkey
			and o_orderkey = l_orderkey
			and c_custkey = o_custkey
			and s_nationkey = n1.n_nationkey
			and c_nationkey = n2.n_nationkey
			and (
				(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
				or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
			)
			and l_shipdate between to_date('1995-01-01') and to_date('1996-12-31')
	) as shipping
group by
	supp_nation,
	cust_nation,
	l_year
order by
	supp_nation,
	cust_nation,
	l_year;

Which is much more effiecient than original query.

Join reordering is complicated and has dependency on cost-based optmization, we will implement this later.

@BohuTANG
Copy link
Member Author

I will test TPCH again today later :/

@xudong963
Copy link
Member

This can be rewritten into:

Yes, I have already rewritten it, so I got the result 🤣

@xudong963
Copy link
Member

xudong963 commented Jun 23, 2022

I will test TPCH again today later :/

For Q17 and Q21, you can try to increase your max open file descriptors in your local @BohuTANG
image

@xudong963
Copy link
Member

Btw, Q19 isn't OOM, I'm debugging it

@leiysky
Copy link
Contributor

leiysky commented Jun 23, 2022

Btw, Q19 isn't OOM, I'm debugging it

It's because of the disjuntion, I have opened a issue for this #6096

@BohuTANG
Copy link
Member Author

Re-run the test for the TPCH test, and update the result.
We can add a TPCH stateless test suite for the TPCH with planner_v2? cc @leiysky @xudong963

@xudong963
Copy link
Member

xudong963 commented Jun 23, 2022

We can add a TPCH stateless test suite for the TPCH with planner_v2?

Good idea, but some queries may cost too much time, such as Q7. 🤔

@xudong963
Copy link
Member

xudong963 commented Jun 23, 2022

Here I update my test results today and categorize them:

  • Q17, Q21, Q2: subquery processing is too slow, can run the results, but it takes a long time
  • Q19 There is currently a downcast type problem, after processing it should not be able to run the results, the filter is too heavy
  • Q16, Q18, Q20: InSubquery is not supported.

cc @BohuTANG @leiysky

@BohuTANG
Copy link
Member Author

@xudong963

The test suite is mainly for the TPCH functional check, not the performance, so we can make the factor to 0.1:

docker run -it -v "$(pwd)":/data ghcr.io/databloom-ai/tpch-docker:main dbgen -vf -s 0.1

@xudong963
Copy link
Member

@xudong963

The test suite is mainly for the TPCH functional check, not the performance, so we can make the factor to 0.1:

docker run -it -v "$(pwd)":/data ghcr.io/databloom-ai/tpch-docker:main dbgen -vf -s 0.1

#6183

@leiysky leiysky moved this to 📒Backlog in Databend Query Planner Jun 24, 2022
This was referenced Jul 2, 2022
@leiysky leiysky moved this from 📒Backlog to 📖In Progress in Databend Query Planner Jul 8, 2022
@BohuTANG
Copy link
Member Author

BohuTANG commented Jul 9, 2022

Update:
Q16 and Q20 works, but Q18 has a internal error:

mysql> select
    ->    c_name,
    ->    c_custkey,
    ->    o_orderkey,
    ->    o_orderdate,
    ->    o_totalprice,
    ->    sum(l_quantity) 
    -> from
    ->    customer,
    ->    orders,
    ->    lineitem 
    -> where
    ->    o_orderkey in 
    ->    (
    ->       select
    ->          l_orderkey 
    ->       from
    ->          lineitem 
    ->       group by
    ->          l_orderkey 
    ->       having
    ->          sum(l_quantity) > 314 
    ->    )
    ->    and c_custkey = o_custkey 
    ->    and o_orderkey = l_orderkey 
    -> group by
    ->    c_name,
    ->    c_custkey,
    ->    o_orderkey,
    ->    o_orderdate,
    ->    o_totalprice 
    -> order by
    ->    o_totalprice desc,
    ->    o_orderdate limit 100;
ERROR 1105 (HY000): Code: 1002, displayText = missing field `unset_bits` at line 1 column 1337.

Server log:

2022-07-09T09:38:55.214087Z  INFO databend_query::servers::mysql::mysql_interactive_worker: Normal query: select    c_name,    c_custkey,    o_orderkey,    o_orderdate,    o_totalprice,    sum(l_quantity)  from    customer,    orders,    lineitem  where    o_orderkey in     (       select          l_orderkey        from          lineitem        group by          l_orderkey        having          sum(l_quantity) > 314     )    and c_custkey = o_custkey     and o_orderkey = l_orderkey  group by    c_name,    c_custkey,    o_orderkey,    o_orderdate,    o_totalprice  order by    o_totalprice desc,    o_orderdate limit 100
2022-07-09T09:38:56.050064Z ERROR databend_query::servers::mysql::writers::query_result_writer: OnQuery Error: Code: 1002, displayText = missing field `unset_bits` at line 1 column 1337.

   0: common_exception::exception::ErrorCode::from_std_error
             at ./common/exception/src/exception.rs:188:65
   1: common_exception::exception_into::<impl core::convert::From<serde_json::error::Error> for common_exception::exception::ErrorCode>::from
             at ./common/exception/src/exception_into.rs:105:9
   2: <core::result::Result<T,F> as core::ops::try_trait::FromResidual<core::result::Result<core::convert::Infallible,E>>>::from_residual
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/result.rs:2125:27
   3: databend_query::storages::fuse::io::read::versioned_reader::load::{{closure}}
             at ./query/src/storages/fuse/io/read/versioned_reader.rs:65:8
   4: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
   5: <databend_query::storages::fuse::meta::versions::SnapshotVersion as databend_query::storages::fuse::io::read::versioned_reader::VersionedReader<databend_query::storages::fuse::meta::v1::snapshot::TableSnapshot>>::read::{{closure}}
             at ./query/src/storages/fuse/io/read/versioned_reader.rs:38:54
   6: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
   7: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
   8: databend_query::storages::fuse::io::read::meta_readers::<impl databend_query::storages::fuse::io::read::cached_reader::Loader<databend_query::storages::fuse::meta::v1::snapshot::TableSnapshot> for T>::load::{{closure}}
             at ./query/src/storages/fuse/io/read/meta_readers.rs:171:29
   9: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  10: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
  11: databend_query::storages::fuse::io::read::cached_reader::CachedReader<T,L>::load::{{closure}}::{{closure}}::{{closure}}
             at ./query/src/storages/fuse/io/read/cached_reader.rs:107:46
  12: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  13: <backoff::future::Retry<S,B,N,Fn,Fut> as core::future::future::Future>::poll
             at /home/bohu/.cargo/registry/src/git.luolix.top-1ecc6299db9ec823/backoff-0.4.0/src/future.rs:182:26
  14: databend_query::storages::fuse::io::read::cached_reader::CachedReader<T,L>::load::{{closure}}
             at ./query/src/storages/fuse/io/read/cached_reader.rs:118:47
  15: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  16: databend_query::storages::fuse::io::read::cached_reader::CachedReader<T,L>::read::{{closure}}
             at ./query/src/storages/fuse/io/read/cached_reader.rs:66:68
  17: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  18: databend_query::storages::fuse::fuse_table::FuseTable::read_table_snapshot::{{closure}}::{{closure}}
             at ./query/src/storages/fuse/fuse_table.rs:128:57
  19: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  20: databend_query::storages::fuse::fuse_table::FuseTable::read_table_snapshot::{{closure}}
             at ./query/src/storages/fuse/fuse_table.rs:120:5
  21: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  22: databend_query::storages::fuse::operations::read_partitions::<impl databend_query::storages::fuse::fuse_table::FuseTable>::do_read_partitions::{{closure}}
             at ./query/src/storages/fuse/operations/read_partitions.rs:40:62
  23: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  24: <databend_query::storages::fuse::fuse_table::FuseTable as databend_query::storages::storage_table::Table>::read_partitions::{{closure}}::{{closure}}
             at ./query/src/storages/fuse/fuse_table.rs:345:49
  25: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  26: <databend_query::storages::fuse::fuse_table::FuseTable as databend_query::storages::storage_table::Table>::read_partitions::{{closure}}
             at ./query/src/storages/fuse/fuse_table.rs:339:5
  27: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  28: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
  29: <dyn databend_query::storages::storage_table::Table as databend_query::storages::storage_table_read_plan::ToReadDataSourcePlan>::read_plan_with_catalog::{{closure}}
             at ./query/src/storages/storage_table_read_plan.rs:57:80
  30: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  31: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
  32: databend_query::sql::planner::binder::table::<impl databend_query::sql::planner::binder::Binder>::bind_table_reference::{{closure}}
             at ./query/src/sql/planner/binder/table.rs:150:93
  33: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  34: databend_query::sql::planner::binder::join::<impl databend_query::sql::planner::binder::Binder>::bind_join::{{closure}}
             at ./query/src/sql/planner/binder/join.rs:51:64
  35: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  36: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
  37: databend_query::sql::planner::binder::table::<impl databend_query::sql::planner::binder::Binder>::bind_table_reference::{{closure}}
             at ./query/src/sql/planner/binder/table.rs:222:89
  38: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  39: databend_query::sql::planner::binder::join::<impl databend_query::sql::planner::binder::Binder>::bind_join::{{closure}}
             at ./query/src/sql/planner/binder/join.rs:51:64
  40: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  41: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
  42: databend_query::sql::planner::binder::table::<impl databend_query::sql::planner::binder::Binder>::bind_table_reference::{{closure}}
             at ./query/src/sql/planner/binder/table.rs:222:89
  43: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  44: databend_query::sql::planner::binder::select::<impl databend_query::sql::planner::binder::Binder>::bind_select_stmt::{{closure}}
             at ./query/src/sql/planner/binder/select.rs:80:66
  45: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  46: databend_query::sql::planner::binder::select::<impl databend_query::sql::planner::binder::Binder>::bind_set_expr::{{closure}}
             at ./query/src/sql/planner/binder/select.rs:164:89
  47: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  48: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
  49: databend_query::sql::planner::binder::select::<impl databend_query::sql::planner::binder::Binder>::bind_query::{{closure}}
             at ./query/src/sql/planner/binder/select.rs:186:79
  50: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  51: databend_query::sql::planner::binder::Binder::bind_statement::{{closure}}
             at ./query/src/sql/planner/binder/mod.rs:101:82
  52: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  53: <core::pin::Pin<P> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/future.rs:124:9
  54: databend_query::sql::planner::binder::Binder::bind::{{closure}}
             at ./query/src/sql/planner/binder/mod.rs:90:54
  55: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  56: databend_query::sql::planner::Planner::plan_sql::{{closure}}
             at ./query/src/sql/planner/mod.rs:62:38
  57: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  58: databend_query::servers::mysql::mysql_interactive_worker::InteractiveWorkerBase<W>::do_query::{{closure}}::{{closure}}
             at ./query/src/servers/mysql/mysql_interactive_worker.rs:308:45
  59: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  60: databend_query::servers::mysql::mysql_interactive_worker::InteractiveWorkerBase<W>::do_query::{{closure}}
             at ./query/src/servers/mysql/mysql_interactive_worker.rs:278:5
  61: <core::future::from_generator::GenFuture<T> as core::future::future::Future>::poll
             at /rustc/ddcbba036aee08f0709f98a92a342a278eae5c05/library/core/src/future/mod.rs:91:19
  62: <databend_query::servers::mysql::mysql_interactive_worker::InteractiveWorker<W> as opensrv_mysql::AsyncMysqlShim<W>>::on_query::{{closure}}

@BohuTANG
Copy link
Member Author

BohuTANG commented Jul 9, 2022

For the last one Q18 I post a new issue here: #6556, close this issue.

@BohuTANG BohuTANG closed this as completed Jul 9, 2022
Repository owner moved this from 📖In Progress to 📕Done in Databend Query Planner Jul 9, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
A-planner Area: planner/optimizer C-testing Category: testing
Projects
Status: 📕Done
Development

No branches or pull requests

3 participants