Skip to content

Commit

Permalink
[Flash-600] PageStorage GC only remove obsoleted data (pingcap#297)
Browse files Browse the repository at this point in the history
* Add test case for migrate ref-page to ref-page

* check if ref-page is valid

* only remove PageFile's data

* Add MOVE_NORMAL_PAGE for only move normal page

* enhance utils_get_valid_pages
  • Loading branch information
JaySon-Huang committed Oct 30, 2019
1 parent bd3a972 commit a02b179
Show file tree
Hide file tree
Showing 17 changed files with 361 additions and 143 deletions.
26 changes: 26 additions & 0 deletions dbms/src/Storages/Page/PageEntries.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class PageEntriesMixin
*/
void put(PageId page_id, const PageEntry & entry);

void move_normal_page(PageId page_id, PageEntry entry);

/** Delete RefPage{page_id} and decrease corresponding Page ref-count.
* if origin Page ref-count down to 0, the Page is erased from entry map
* template must_exist = true ensure that corresponding Page must exist.
Expand Down Expand Up @@ -230,6 +232,30 @@ void PageEntriesMixin<T>::put(PageId page_id, const PageEntry & entry)
max_page_id = std::max(max_page_id, page_id);
}

template <typename T>
void PageEntriesMixin<T>::move_normal_page(PageId normal_page_id, PageEntry entry)
{
assert(is_base); // can only call by base

// update normal page's entry
auto ori_iter = normal_pages.find(normal_page_id);
if (likely(ori_iter != normal_pages.end()))
{
// replace ori Page{normal_page_id}'s entry but inherit ref-counting
const UInt32 page_ref_count = ori_iter->second.ref;
entry.ref = page_ref_count;
normal_pages[normal_page_id] = entry;
}
else
{
// Page{normal_page_id} not exist
throw Exception("Try to move non-exist normal page: " + DB::toString(normal_page_id), ErrorCodes::LOGICAL_ERROR);
}

// update max_page_id
max_page_id = std::max(max_page_id, normal_page_id);
}

template <typename T>
template <bool must_exist>
void PageEntriesMixin<T>::del(PageId page_id)
Expand Down
125 changes: 103 additions & 22 deletions dbms/src/Storages/Page/PageFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ std::pair<ByteBuffer, ByteBuffer> genWriteData( //
switch (write.type)
{
case WriteBatch::WriteType::PUT:
case WriteBatch::WriteType::MOVE_NORMAL_PAGE:
data_write_bytes += write.size;
meta_write_bytes += PAGE_META_SIZE;
break;
Expand Down Expand Up @@ -93,6 +94,7 @@ std::pair<ByteBuffer, ByteBuffer> genWriteData( //
switch (write.type)
{
case WriteBatch::WriteType::PUT:
case WriteBatch::WriteType::MOVE_NORMAL_PAGE:
{
write.read_buffer->readStrict(data_pos, write.size);
Checksum page_checksum = CityHash_v1_0_2::CityHash64(data_pos, write.size);
Expand All @@ -106,7 +108,10 @@ std::pair<ByteBuffer, ByteBuffer> genWriteData( //
pc.offset = page_data_file_off;
pc.checksum = page_checksum;

edit.put(write.page_id, pc);
if (write.type == WriteBatch::WriteType::PUT)
edit.put(write.page_id, pc);
else if (write.type == WriteBatch::WriteType::MOVE_NORMAL_PAGE)
edit.moveNormalPage(write.page_id, pc);

PageUtil::put(meta_pos, (PageId)write.page_id);
PageUtil::put(meta_pos, (PageTag)write.tag);
Expand Down Expand Up @@ -191,6 +196,7 @@ std::pair<UInt64, UInt64> analyzeMetaFile( //
switch (write_type)
{
case WriteBatch::WriteType::PUT:
case WriteBatch::WriteType::MOVE_NORMAL_PAGE:
{
auto page_id = PageUtil::get<PageId>(pos);
PageEntry pc;
Expand All @@ -201,7 +207,11 @@ std::pair<UInt64, UInt64> analyzeMetaFile( //
pc.size = PageUtil::get<PageSize>(pos);
pc.checksum = PageUtil::get<Checksum>(pos);

edit.put(page_id, pc);
if (write_type == WriteBatch::WriteType::PUT)
edit.put(page_id, pc);
else if (write_type == WriteBatch::WriteType::MOVE_NORMAL_PAGE)
edit.moveNormalPage(page_id, pc);

page_data_file_size += pc.size;
break;
}
Expand Down Expand Up @@ -405,7 +415,13 @@ void PageFile::Reader::read(PageIdAndEntries & to_read, const PageHandler & hand
const PageFile::Version PageFile::CURRENT_VERSION = 1;

PageFile::PageFile(PageFileId file_id_, UInt32 level_, const std::string & parent_path, bool is_tmp_, bool is_create, Logger * log_)
: file_id(file_id_), level(level_), is_tmp(is_tmp_), parent_path(parent_path), data_file_pos(0), meta_file_pos(0), log(log_)
: file_id(file_id_),
level(level_),
type(is_tmp_ ? Type::Temp : Type::Formal),
parent_path(parent_path),
data_file_pos(0),
meta_file_pos(0),
log(log_)
{
if (is_create)
{
Expand All @@ -416,42 +432,62 @@ PageFile::PageFile(PageFileId file_id_, UInt32 level_, const std::string & paren
}
}

std::pair<PageFile, bool> PageFile::recover(const std::string & parent_path, const std::string & page_file_name, Logger * log)
std::pair<PageFile, PageFile::Type> PageFile::recover(const String & parent_path, const String & page_file_name, Logger * log)
{

if (!startsWith(page_file_name, ".tmp.page_") && !startsWith(page_file_name, "page_"))
if (!startsWith(page_file_name, folder_prefix_formal) && !startsWith(page_file_name, folder_prefix_temp)
&& !startsWith(page_file_name, folder_prefix_legacy))
{
LOG_INFO(log, "Not page file, ignored " + page_file_name);
return {{}, false};
return {{}, Type::Invalid};
}
std::vector<std::string> ss;
boost::split(ss, page_file_name, boost::is_any_of("_"));
if (ss.size() != 3)
{
LOG_INFO(log, "Unrecognized file, ignored: " + page_file_name);
return {{}, false};
return {{}, Type::Invalid};
}
if (ss[0] == ".tmp.page")

PageFileId file_id = std::stoull(ss[1]);
UInt32 level = std::stoi(ss[2]);
PageFile pf(file_id, level, parent_path, /* is_temp */ false, /* is_create */ false, log);
if (ss[0] == folder_prefix_temp)
{
LOG_INFO(log, "Temporary page file, ignored: " + page_file_name);
return {{}, false};
return {{}, Type::Temp};
}
// ensure both meta && data exist
PageFileId file_id = std::stoull(ss[1]);
UInt32 level = std::stoi(ss[2]);
PageFile pf(file_id, level, parent_path, false, false, log);
if (!Poco::File(pf.metaPath()).exists())
else if (ss[0] == folder_prefix_legacy)
{
LOG_INFO(log, "Broken page without meta file, ignored: " + pf.metaPath());
return {{}, false};
pf.type = Type::Legacy;
// ensure meta exist
if (!Poco::File(pf.metaPath()).exists())
{
LOG_INFO(log, "Broken page without meta file, ignored: " + pf.metaPath());
return {{}, Type::Invalid};
}

return {pf, Type::Legacy};
}
if (!Poco::File(pf.dataPath()).exists())
else if (ss[0] == folder_prefix_formal)
{
LOG_INFO(log, "Broken page without data file, ignored: " + pf.dataPath());
return {{}, false};
// ensure both meta && data exist
if (!Poco::File(pf.metaPath()).exists())
{
LOG_INFO(log, "Broken page without meta file, ignored: " + pf.metaPath());
return {{}, Type::Invalid};
}

if (!Poco::File(pf.dataPath()).exists())
{
LOG_INFO(log, "Broken page without data file, ignored: " + pf.dataPath());
return {{}, Type::Invalid};
}
return {pf, Type::Formal};
}

return {pf, true};
LOG_INFO(log, "Unrecognized file prefix, ignored: " + page_file_name);
return {{}, Type::Invalid};
}

PageFile PageFile::newPageFile(PageFileId file_id, UInt32 level, const std::string & parent_path, bool is_tmp, Logger * log)
Expand Down Expand Up @@ -486,13 +522,36 @@ void PageFile::readAndSetPageMetas(PageEntriesEdit & edit)

void PageFile::setFormal()
{
if (!is_tmp)
if (type != Type::Temp)
return;
Poco::File file(folderPath());
is_tmp = false;
type = Type::Formal;
file.renameTo(folderPath());
}

void PageFile::setLegacy()
{
if (type != Type::Formal)
return;
// rename to legacy dir
Poco::File formal_dir(folderPath());
type = Type::Legacy;
formal_dir.renameTo(folderPath());
// remove the data part
if (auto data_file = Poco::File(dataPath()); data_file.exists())
{
data_file.remove();
}
}

void PageFile::removeDataIfExists() const
{
if (auto data_file = Poco::File(dataPath()); data_file.exists())
{
data_file.remove();
}
}

void PageFile::destroy() const
{
// TODO: delay remove.
Expand Down Expand Up @@ -529,4 +588,26 @@ UInt64 PageFile::getDataFileSize() const
return file.getSize();
}

String PageFile::folderPath() const
{
String path = parent_path + "/";
switch (type)
{
case Type::Formal:
path += folder_prefix_formal;
break;
case Type::Temp:
path += folder_prefix_temp;
break;
case Type::Legacy:
path += folder_prefix_legacy;
break;

case Type::Invalid:
throw Exception("Try to access folderPath of invalid page file.", ErrorCodes::LOGICAL_ERROR);
}
path += "_" + DB::toString(file_id) + "_" + DB::toString(level);
return path;
}

} // namespace DB
30 changes: 21 additions & 9 deletions dbms/src/Storages/Page/PageFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,18 @@ class PageFile : public Allocator<false>
};

public:
enum class Type
{
Invalid = 0,
Formal,
Temp, // written by GC thread
Legacy, // the data is obsoleted and has been removed, only meta left
};

/// Create an empty page file.
PageFile() = default;
/// Recover a page file from disk.
static std::pair<PageFile, bool> recover(const String & parent_path, const String & page_file_name, Poco::Logger * log);
static std::pair<PageFile, Type> recover(const String & parent_path, const String & page_file_name, Poco::Logger * log);
/// Create a new page file.
static PageFile newPageFile(PageFileId file_id, UInt32 level, const String & parent_path, bool is_tmp, Poco::Logger * log);
/// Open an existing page file for read.
Expand All @@ -95,6 +103,8 @@ class PageFile : public Allocator<false>

/// Rename this page file into formal style.
void setFormal();
/// Rename this page file into legacy style and remove data.
void setLegacy();
/// Destroy underlying system files.
void destroy() const;

Expand All @@ -112,23 +122,25 @@ class PageFile : public Allocator<false>
UInt64 getDataFileAppendPos() const { return data_file_pos; }
UInt64 getDataFileSize() const;
bool isExist() const;
void removeDataIfExists() const;

private:
/// Create a new page file.
PageFile(PageFileId file_id_, UInt32 level_, const String & parent_path, bool is_tmp_, bool is_create, Poco::Logger * log);

String folderPath() const
{
return parent_path + "/" + (is_tmp ? ".tmp.page_" : "page_") + DB::toString(file_id) + "_" + DB::toString(level);
}
String folderPath() const;
String dataPath() const { return folderPath() + "/page"; }
String metaPath() const { return folderPath() + "/meta"; }

constexpr static const char * folder_prefix_formal = "page";
constexpr static const char * folder_prefix_temp = ".temp.page";
constexpr static const char * folder_prefix_legacy = "legacy.page";

private:
UInt64 file_id = 0; // Valid id start from 1.
UInt32 level = 0; // 0: normal, >= 1: generated by GC.
bool is_tmp = false; // true if currently writen by GC thread.
String parent_path{}; // The parent folder of this page file.
UInt64 file_id = 0; // Valid id start from 1.
UInt32 level = 0; // 0: normal, >= 1: generated by GC.
Type type = Type::Formal;
String parent_path{}; // The parent folder of this page file.

// The append pos.
UInt64 data_file_pos = 0;
Expand Down
Loading

0 comments on commit a02b179

Please sign in to comment.