Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ao related commits cherry-pick #884

Merged
merged 5 commits into from
Jan 29, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/backend/access/appendonly/appendonlyam.c
Original file line number Diff line number Diff line change
Expand Up @@ -2897,7 +2897,7 @@ appendonly_insert(AppendOnlyInsertDesc aoInsertDesc,
*/
if (need_toast)
tup = memtup_toast_insert_or_update(relation, instup,
NULL, aoInsertDesc->mt_bind, 0);
NULL, aoInsertDesc->mt_bind, aoInsertDesc->toast_tuple_target, 0);
else
tup = instup;

Expand Down
5 changes: 4 additions & 1 deletion src/backend/access/bitmap/bitmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,9 @@ copy_scan_desc(IndexScanDesc scan)
*
* If newentry is false, we're calling the function with a partially filled
* page table entry. Otherwise, the entry is empty.
*
* This function is only used in stream bitmap scan, more specifically, it's
* BitmapIndexScan + BitmapHeapScan.
*/

static bool
Expand Down Expand Up @@ -960,7 +963,7 @@ words_get_match(BMBatchWords *words, BMIterateResult *result,
*/
if (words->firstTid < result->nextTid)
{
Assert(words->nwords < 1);
Assert(words->nwords == 0);
return false;
}

Expand Down
24 changes: 18 additions & 6 deletions src/backend/access/bitmap/bitmaputil.c
Original file line number Diff line number Diff line change
Expand Up @@ -260,9 +260,23 @@ _bitmap_findnexttids(BMBatchWords *words, BMIterateResult *result,

result->nextTidLoc = result->numOfTids = 0;

_bitmap_catchup_to_next_tid(words, result);

Assert(words->firstTid == result->nextTid);
/*
* Only in the situation that there have concurrent read/write on two
* adjacent bitmap index pages, and inserting a tid into PAGE_FULL cause expand
* compressed words to new words, and rearrange those words into PAGE_NEXT,
* and we ready to read a new page, we should adjust result-> lastScanWordNo
* to the current position.
*
* The value of words->startNo will always be 0, this value will only used at
* _bitmap_union to union a bunch of bitmaps, the union result will be stored
* at words. result->lastScanWordNo indicates the location in words->cwords that
* BMIterateResult will read the word next, it's start from 0, and will
* self-incrementing during the scan. So if result->lastScanWordNo equals to
* words->startNo, means we will scan a new bitmap index pages.
*/
if (result->lastScanWordNo == words->startNo &&
words->firstTid < result->nextTid)
_bitmap_catchup_to_next_tid(words, result);

while (words->nwords > 0 && result->numOfTids < maxTids && !done)
{
Expand Down Expand Up @@ -358,10 +372,8 @@ _bitmap_findnexttids(BMBatchWords *words, BMIterateResult *result,

/*
* _bitmap_catchup_to_next_tid - Catch up to the nextTid we need to check
* from last iteration.
* from last iteration, in the following cases:
*
* Normally words->firstTid should equal to result->nextTid. But there
* are exceptions:
* 1: When the concurrent insert causes bitmap items from previous full page
* to spill over to current page in the window when we (the read transaction)
* had released the lock on the previous page and not locked the current page.
Expand Down
54 changes: 54 additions & 0 deletions src/backend/access/common/reloptions_gp.c
Original file line number Diff line number Diff line change
Expand Up @@ -1968,3 +1968,57 @@ ao_amoptions(Datum reloptions, char relkind, bool validate)
return NULL;
}
}

/*
* GPDB: Convenience function to judge a relation option whether already in opts
*/
bool
reloptions_has_opt(List *opts, const char *name)
{
ListCell *lc;
foreach(lc, opts)
{
DefElem *de = lfirst(lc);
if (pg_strcasecmp(de->defname, name) == 0)
return true;
}
return false;
}

/*
* GPDB: Convenience function to build storage reloptions for a given relation, just for AO table.
*/
List *
build_ao_rel_storage_opts(List *opts, Relation rel)
{
bool checksum = true;
int32 blocksize = -1;
int16 compresslevel = 0;
char *compresstype = NULL;
NameData compresstype_nd;

GetAppendOnlyEntryAttributes(RelationGetRelid(rel),
&blocksize,
NULL,
&compresslevel,
&checksum,
&compresstype_nd);
compresstype = NameStr(compresstype_nd);

if (!reloptions_has_opt(opts, "blocksize"))
opts = lappend(opts, makeDefElem("blocksize", (Node *) makeInteger(blocksize), -1));

if (!reloptions_has_opt(opts, "compresslevel"))
opts = lappend(opts, makeDefElem("compresslevel", (Node *) makeInteger(compresslevel), -1));

if (!reloptions_has_opt(opts, "checksum"))
opts = lappend(opts, makeDefElem("checksum", (Node *) makeInteger(checksum), -1));

if (!reloptions_has_opt(opts, "compresstype"))
{
compresstype = (compresstype && compresstype[0]) ? pstrdup(compresstype) : "none";
opts = lappend(opts, makeDefElem("compresstype", (Node *) makeString(compresstype), -1));
}

return opts;
}
26 changes: 16 additions & 10 deletions src/backend/access/heap/heaptoast.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

static void *
heap_toast_insert_or_update_generic(Relation rel, void *newtup, void *oldtup,
MemTupleBinding *pbind, int options, bool ismemtuple);
MemTupleBinding *pbind, int options, int toast_tuple_target, bool ismemtuple);

/* ----------
* heap_toast_delete -
Expand Down Expand Up @@ -100,18 +100,18 @@ heap_toast_delete(Relation rel, HeapTuple oldtup, bool is_speculative)
HeapTuple
heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options)
{
return (HeapTuple) heap_toast_insert_or_update_generic(rel, newtup, oldtup, NULL, options, false);
return (HeapTuple) heap_toast_insert_or_update_generic(rel, newtup, oldtup, NULL, options, TOAST_TUPLE_TARGET, false);
}

MemTuple memtup_toast_insert_or_update(Relation rel, MemTuple newtup, MemTuple oldtup,
MemTupleBinding *pbind, int options)
MemTupleBinding *pbind, int toast_tuple_target, int options)
{
return (MemTuple) heap_toast_insert_or_update_generic(rel, newtup, oldtup, pbind, options, true);
return (MemTuple) heap_toast_insert_or_update_generic(rel, newtup, oldtup, pbind, toast_tuple_target, options, true);
}

static void *
heap_toast_insert_or_update_generic(Relation rel, void *newtup, void *oldtup,
MemTupleBinding *pbind, int options, bool ismemtuple)
MemTupleBinding *pbind, int options, int toast_tuple_target, bool ismemtuple)
{
void *result_gtuple;
TupleDesc tupleDesc;
Expand Down Expand Up @@ -210,10 +210,8 @@ heap_toast_insert_or_update_generic(Relation rel, void *newtup, void *oldtup,
}
else
{
/* Since reloptions for AO table is not permitted, so using TOAST_TUPLE_TARGET */
hoff = sizeof(MemTupleData);
hoff = MAXALIGN(hoff);
maxDataLen = TOAST_TUPLE_TARGET - hoff;
maxDataLen = toast_tuple_target;
hoff = -1; /* keep compiler quiet about using 'hoff' uninitialized */
}

/*
Expand Down Expand Up @@ -295,7 +293,15 @@ heap_toast_insert_or_update_generic(Relation rel, void *newtup, void *oldtup,
* increase the target tuple size, so that MAIN attributes aren't stored
* externally unless really necessary.
*/
maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;
/*
* FIXME: Should we do something like this with memtuples on
* AO tables too? Currently we do not increase the target tuple size for AO
* table, so there are occasions when columns of type 'm' will be stored
* out-of-line but they could otherwise be accommodated in-block
* c.f. upstream Postgres commit ca7c8168de76459380577eda56a3ed09b4f6195c
*/
if (!ismemtuple)
maxDataLen = TOAST_TUPLE_TARGET_MAIN - hoff;

while (heap_compute_data_size(tupleDesc,
toast_values, toast_isnull) > maxDataLen &&
Expand Down
14 changes: 8 additions & 6 deletions src/backend/catalog/namespace.c
Original file line number Diff line number Diff line change
Expand Up @@ -3487,17 +3487,19 @@ SetTempNamespaceState(Oid tempNamespaceId, Oid tempToastNamespaceId)
/*
* like SetTempNamespaceState, but the process running normally
*
* GPDB: used to set session level temporary namespace after gang launched.
* GPDB: used to set session level temporary namespace after reader gang launched.
*/
void
SetTempNamespaceStateAfterBoot(Oid tempNamespaceId, Oid tempToastNamespaceId)
{
/* same as PG, can not switch to other temp namespace dynamically */
Assert(myTempNamespace == InvalidOid || myTempNamespace == tempNamespaceId);
Assert(myTempToastNamespace == InvalidOid || myTempToastNamespace == tempToastNamespaceId);
Assert(Gp_role == GP_ROLE_EXECUTE);

/* if the namespace OID already setted, baseSearchPath is still valid */
if (myTempNamespace == tempToastNamespaceId && myTempToastNamespace == tempToastNamespaceId)
/* writer gang will do InitTempTableNamespace(), ignore the dispatch on writer gang */
if (Gp_is_writer)
return;

/* skip rebuild search path if search path is correct and valid */
if (tempNamespaceId == myTempNamespace && myTempToastNamespace == tempToastNamespaceId)
return;

myTempNamespace = tempNamespaceId;
Expand Down
2 changes: 1 addition & 1 deletion src/backend/cdb/dispatcher/cdbdisp_query.c
Original file line number Diff line number Diff line change
Expand Up @@ -1021,7 +1021,7 @@ buildGpQueryString(DispatchCommandQueryParms *pQueryParms,
pos += resgroupInfo.len;
}

/* in-process variable for temporary namespace */
/* pass process local variables to QEs */
GetTempNamespaceState(&tempNamespaceId, &tempToastNamespaceId);
tempNamespaceId = htonl(tempNamespaceId);
tempToastNamespaceId = htonl(tempToastNamespaceId);
Expand Down
15 changes: 14 additions & 1 deletion src/backend/commands/tablecmds.c
Original file line number Diff line number Diff line change
Expand Up @@ -17888,7 +17888,7 @@ build_ctas_with_dist(Relation rel, DistributedBy *dist_clause,

pre_built = prebuild_temp_table(rel, tmprel, dist_clause,
get_am_name(rel->rd_rel->relam),
storage_opts,
RelationIsAppendOptimized(rel) ? build_ao_rel_storage_opts(NIL, rel) : storage_opts,
RelationIsAppendOptimized(rel),
useExistingColumnAttributes);
if (pre_built)
Expand All @@ -17912,6 +17912,19 @@ build_ctas_with_dist(Relation rel, DistributedBy *dist_clause,
into->options = storage_opts;
into->tableSpaceName = get_tablespace_name(tblspc);
into->distributedBy = (Node *)dist_clause;
if (RelationIsAppendOptimized(rel))
{
/*
* In order to avoid being affected by the GUC of gp_default_storage_options,
* we should re-build storage options from original table.
*
* The reason is that when we use the default parameters to create a table,
* the configuration will not be written to pg_class.reloptions, and then if
* gp_default_storage_options is modified, the newly created table will be
* inconsistent with the original table.
*/
into->options = build_ao_rel_storage_opts(into->options, rel);
}
s->intoClause = into;

RawStmt *rawstmt = makeNode(RawStmt);
Expand Down
2 changes: 1 addition & 1 deletion src/backend/tcop/postgres.c
Original file line number Diff line number Diff line change
Expand Up @@ -5722,7 +5722,7 @@ PostgresMain(int argc, char *argv[],
if (resgroupInfoLen > 0)
resgroupInfoBuf = pq_getmsgbytes(&input_message, resgroupInfoLen);

/* in-process variable for temporary namespace */
/* process local variables for temporary namespace */
{
Oid tempNamespaceId, tempToastNamespaceId;

Expand Down
2 changes: 1 addition & 1 deletion src/include/access/heaptoast.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@
*/
extern HeapTuple heap_toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup, int options);
extern MemTuple memtup_toast_insert_or_update(Relation rel, MemTuple newtup, MemTuple oldtup,
MemTupleBinding *pbind, int options);
MemTupleBinding *pbind, int toast_tuple_target, int options);

/* ----------
* heap_toast_delete -
Expand Down
2 changes: 2 additions & 0 deletions src/include/access/reloptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,8 @@ List* transfromColumnEncodingAocoRootPartition(List *colDefs, List *stenc, List
extern List *transformStorageEncodingClause(List *options, bool validate);
extern List *form_default_storage_directive(List *enc);
extern bool is_storage_encoding_directive(const char *name);
extern bool reloptions_has_opt(List *opts, const char *name);
extern List *build_ao_rel_storage_opts(List *opts, Relation rel);

extern relopt_value *
parseRelOptions(Datum options, bool validate, relopt_kind kind, int *numrelopts);
Expand Down
28 changes: 28 additions & 0 deletions src/test/regress/expected/bfv_temp.out
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,31 @@ drop table tn_b_b;
drop table tn_b_temp;
drop table tn_b_new;
drop function fun(sql text, a oid);
-- Chek if error out inside UDF, myTempNamespace will roll back
\c
create or replace function errored_udf() returns int[] as 'BEGIN RAISE EXCEPTION ''AAA''; END' language plpgsql;
create table n as select from generate_series(1, 10);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named '' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select count(*) from n n1, n n2; -- boot reader gang
count
-------
100
(1 row)

create temp table nn as select errored_udf() from n;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'errored_udf' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
ERROR: AAA (seg0 slice1 127.0.0.1:7002 pid=123850)
CONTEXT: PL/pgSQL function errored_udf() line 1 at RAISE
create temp table nnn as select * from generate_series(1, 10); -- check if reader do the rollback. should OK
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'generate_series' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
select count(*) from nnn n1, nnn n2; -- check if reader can read temp table. should OK
count
-------
100
(1 row)

drop table n;
drop function errored_udf();
20 changes: 20 additions & 0 deletions src/test/regress/expected/bitmap_index.out
Original file line number Diff line number Diff line change
Expand Up @@ -1088,3 +1088,23 @@ select * from bm_test where b = 1;

-- clean up
drop table bm_test;
-- test the scenario that we need read the same batch words many times
-- more detials can be found at https://github.com/greenplum-db/gpdb/issues/13446
SET enable_seqscan = OFF;
SET enable_bitmapscan = OFF;
create table foo_13446(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create index idx_13446 on foo_13446 using bitmap(b);
insert into foo_13446 select 1, 1 from generate_series(0, 16384);
-- At current implementation, BMIterateResult can only store 16*1024=16384 TIDs,
-- if we have 13685 TIDs to read, it must scan same batch words twice, that's what we want
select count(*) from foo_13446 where b = 1;
count
-------
16385
(1 row)

drop table foo_13446;
SET enable_seqscan = ON;
SET enable_bitmapscan = ON;
20 changes: 20 additions & 0 deletions src/test/regress/expected/bitmap_index_optimizer.out
Original file line number Diff line number Diff line change
Expand Up @@ -1099,3 +1099,23 @@ select * from bm_test where b = 1;

-- clean up
drop table bm_test;
-- test the scenario that we need read the same batch words many times
-- more detials can be found at https://github.com/greenplum-db/gpdb/issues/13446
SET enable_seqscan = OFF;
SET enable_bitmapscan = OFF;
create table foo_13446(a int, b int);
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'a' as the Greenplum Database data distribution key for this table.
HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen are the optimal data distribution key to minimize skew.
create index idx_13446 on foo_13446 using bitmap(b);
insert into foo_13446 select 1, 1 from generate_series(0, 16384);
-- At current implementation, BMIterateResult can only store 16*1024=16384 TIDs,
-- if we have 13685 TIDs to read, it must scan same batch words twice, that's what we want
select count(*) from foo_13446 where b = 1;
count
-------
16385
(1 row)

drop table foo_13446;
SET enable_seqscan = ON;
SET enable_bitmapscan = ON;
Loading
Loading