Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Correctly truncate VM #9342

Merged
merged 11 commits into from
Nov 14, 2024
Merged
5 changes: 5 additions & 0 deletions libs/pageserver_api/src/record.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ pub enum NeonWalRecord {
file_path: String,
content: Option<Bytes>,
},
// Truncate visibility map page
TruncateVisibilityMap {
trunc_byte: usize,
trunc_offs: usize,
},
MMeent marked this conversation as resolved.
Show resolved Hide resolved

/// A testing record for unit testing purposes. It supports append data to an existing image, or clear it.
#[cfg(feature = "testing")]
Expand Down
7 changes: 5 additions & 2 deletions libs/postgres_ffi/src/pg_constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -243,8 +243,11 @@ const FSM_LEAF_NODES_PER_PAGE: usize = FSM_NODES_PER_PAGE - FSM_NON_LEAF_NODES_P
pub const SLOTS_PER_FSM_PAGE: u32 = FSM_LEAF_NODES_PER_PAGE as u32;

/* From visibilitymap.c */
pub const VM_HEAPBLOCKS_PER_PAGE: u32 =
(BLCKSZ as usize - SIZEOF_PAGE_HEADER_DATA) as u32 * (8 / 2); // MAPSIZE * (BITS_PER_BYTE / BITS_PER_HEAPBLOCK)

pub const VM_MAPSIZE: usize = BLCKSZ as usize - MAXALIGN_SIZE_OF_PAGE_HEADER_DATA;
pub const VM_BITS_PER_HEAPBLOCK: usize = 2;
pub const VM_HEAPBLOCKS_PER_BYTE: usize = 8 / VM_BITS_PER_HEAPBLOCK;
pub const VM_HEAPBLOCKS_PER_PAGE: usize = VM_MAPSIZE * VM_HEAPBLOCKS_PER_BYTE;
knizhnik marked this conversation as resolved.
Show resolved Hide resolved

/* From origin.c */
pub const REPLICATION_STATE_MAGIC: u32 = 0x1257DADE;
Expand Down
28 changes: 23 additions & 5 deletions pageserver/src/walingest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -621,11 +621,29 @@ impl WalIngest {
forknum: VISIBILITYMAP_FORKNUM,
};

let mut vm_page_no = blkno / pg_constants::VM_HEAPBLOCKS_PER_PAGE;
if blkno % pg_constants::VM_HEAPBLOCKS_PER_PAGE != 0 {
// Tail of last remaining vm page has to be zeroed.
// We are not precise here and instead of digging in VM bitmap format just clear the whole page.
modification.put_rel_page_image_zero(rel, vm_page_no)?;
// last remaining block, byte, and bit
let mut vm_page_no = blkno / (pg_constants::VM_HEAPBLOCKS_PER_PAGE as u32);
let trunc_byte = blkno as usize % pg_constants::VM_HEAPBLOCKS_PER_PAGE
/ pg_constants::VM_HEAPBLOCKS_PER_BYTE;
let trunc_offs = blkno as usize % pg_constants::VM_HEAPBLOCKS_PER_BYTE
* pg_constants::VM_BITS_PER_HEAPBLOCK;

// Unless the new size is exactly at a visibility map page boundary, the
// tail bits in the last remaining map page, representing truncated heap
// blocks, need to be cleared. This is not only tidy, but also necessary
// because we don't get a chance to clear the bits if the heap is extended
// again.
if (trunc_byte != 0 || trunc_offs != 0)
&& self.shard.is_key_local(&rel_block_to_key(rel, vm_page_no))
{
modification.put_rel_wal_record(
rel,
vm_page_no,
NeonWalRecord::TruncateVisibilityMap {
trunc_byte,
trunc_offs,
},
)?;
hlinnaka marked this conversation as resolved.
Show resolved Hide resolved
vm_page_no += 1;
}
let nblocks = get_relsize(modification, rel, ctx).await?;
Expand Down
28 changes: 28 additions & 0 deletions pageserver/src/walredo/apply_neon.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,34 @@ pub(crate) fn apply_in_neon(
} => {
anyhow::bail!("tried to pass postgres wal record to neon WAL redo");
}
//
// Code copied from PostgreSQL `visibilitymap_prepare_truncate` function in `visibilitymap.c`
//
NeonWalRecord::TruncateVisibilityMap {
trunc_byte,
trunc_offs,
} => {
// sanity check that this is modifying the correct relation
let (rel, _) = key.to_rel_block().context("invalid record")?;
assert!(
rel.forknum == VISIBILITYMAP_FORKNUM,
"TruncateVisibilityMap record on unexpected rel {}",
rel
);
let map = &mut page[pg_constants::MAXALIGN_SIZE_OF_PAGE_HEADER_DATA..];
map[*trunc_byte + 1..].fill(0u8);
/*----
* Mask out the unwanted bits of the last remaining byte.
*
* ((1 << 0) - 1) = 00000000
* ((1 << 1) - 1) = 00000001
* ...
* ((1 << 6) - 1) = 00111111
* ((1 << 7) - 1) = 01111111
*----
*/
map[*trunc_byte] &= (1 << *trunc_offs) - 1;
}
NeonWalRecord::ClearVisibilityMapFlags {
new_heap_blkno,
old_heap_blkno,
Expand Down
33 changes: 33 additions & 0 deletions test_runner/regress/test_vm_truncate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from fixtures.neon_fixtures import NeonEnv


#
# Test that VM is properly truncated
#
def test_vm_truncate(neon_simple_env: NeonEnv):
env = neon_simple_env

endpoint = env.endpoints.create_start("main")
con = endpoint.connect()
cur = con.cursor()
cur.execute("CREATE EXTENSION neon_test_utils")
cur.execute("CREATE EXTENSION pageinspect")

cur.execute(
"create table t(pk integer primary key, counter integer default 0, filler text default repeat('?', 200))"
)
cur.execute("insert into t (pk) values (generate_series(1,1000))")
cur.execute("delete from t where pk>10")
cur.execute("vacuum t") # truncates the relation, including its VM and FSM
# get image of the first block of the VM excluding the page header. It's expected
# to still be in the buffer cache.
# ignore page header (24 bytes, 48 - it's hex representation)
cur.execute("select substr(encode(get_raw_page('t', 'vm', 0), 'hex'), 48)")
knizhnik marked this conversation as resolved.
Show resolved Hide resolved
pg_bitmap = cur.fetchall()[0][0]
# flush shared buffers
cur.execute("SELECT clear_buffer_cache()")
# now download the first block of the VM from the pageserver ...
cur.execute("select substr(encode(get_raw_page('t', 'vm', 0), 'hex'), 48)")
ps_bitmap = cur.fetchall()[0][0]
# and check that content of bitmaps are equal, i.e. PS is producing the same VM page as Postgres
assert pg_bitmap == ps_bitmap
Loading