Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Flash-600] PageStorage GC only remove obsoleted data #297

Merged
merged 5 commits into from
Oct 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions dbms/src/Storages/Page/PageEntries.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ class PageEntriesMixin
*/
void put(PageId page_id, const PageEntry & entry);

void move_normal_page(PageId page_id, PageEntry entry);

/** Delete RefPage{page_id} and decrease corresponding Page ref-count.
* if origin Page ref-count down to 0, the Page is erased from entry map
* template must_exist = true ensure that corresponding Page must exist.
Expand Down Expand Up @@ -230,6 +232,30 @@ void PageEntriesMixin<T>::put(PageId page_id, const PageEntry & entry)
max_page_id = std::max(max_page_id, page_id);
}

template <typename T>
void PageEntriesMixin<T>::move_normal_page(PageId normal_page_id, PageEntry entry)
{
assert(is_base); // can only call by base

// update normal page's entry
auto ori_iter = normal_pages.find(normal_page_id);
if (likely(ori_iter != normal_pages.end()))
{
// replace ori Page{normal_page_id}'s entry but inherit ref-counting
const UInt32 page_ref_count = ori_iter->second.ref;
entry.ref = page_ref_count;
normal_pages[normal_page_id] = entry;
}
else
{
// Page{normal_page_id} not exist
throw Exception("Try to move non-exist normal page: " + DB::toString(normal_page_id), ErrorCodes::LOGICAL_ERROR);
}

// update max_page_id
max_page_id = std::max(max_page_id, normal_page_id);
}

template <typename T>
template <bool must_exist>
void PageEntriesMixin<T>::del(PageId page_id)
Expand Down
130 changes: 107 additions & 23 deletions dbms/src/Storages/Page/PageFile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ std::pair<ByteBuffer, ByteBuffer> genWriteData( //
switch (write.type)
{
case WriteBatch::WriteType::PUT:
case WriteBatch::WriteType::MOVE_NORMAL_PAGE:
data_write_bytes += write.size;
meta_write_bytes += PAGE_META_SIZE;
break;
Expand Down Expand Up @@ -93,6 +94,7 @@ std::pair<ByteBuffer, ByteBuffer> genWriteData( //
switch (write.type)
{
case WriteBatch::WriteType::PUT:
case WriteBatch::WriteType::MOVE_NORMAL_PAGE:
{
write.read_buffer->readStrict(data_pos, write.size);
Checksum page_checksum = CityHash_v1_0_2::CityHash64(data_pos, write.size);
Expand All @@ -106,7 +108,10 @@ std::pair<ByteBuffer, ByteBuffer> genWriteData( //
pc.offset = page_data_file_off;
pc.checksum = page_checksum;

edit.put(write.page_id, pc);
if (write.type == WriteBatch::WriteType::PUT)
edit.put(write.page_id, pc);
else if (write.type == WriteBatch::WriteType::MOVE_NORMAL_PAGE)
edit.moveNormalPage(write.page_id, pc);

PageUtil::put(meta_pos, (PageId)write.page_id);
PageUtil::put(meta_pos, (PageTag)write.tag);
Expand Down Expand Up @@ -191,6 +196,7 @@ std::pair<UInt64, UInt64> analyzeMetaFile( //
switch (write_type)
{
case WriteBatch::WriteType::PUT:
case WriteBatch::WriteType::MOVE_NORMAL_PAGE:
{
auto page_id = PageUtil::get<PageId>(pos);
PageEntry pc;
Expand All @@ -201,7 +207,11 @@ std::pair<UInt64, UInt64> analyzeMetaFile( //
pc.size = PageUtil::get<PageSize>(pos);
pc.checksum = PageUtil::get<Checksum>(pos);

edit.put(page_id, pc);
if (write_type == WriteBatch::WriteType::PUT)
edit.put(page_id, pc);
else if (write_type == WriteBatch::WriteType::MOVE_NORMAL_PAGE)
edit.moveNormalPage(page_id, pc);

page_data_file_size += pc.size;
break;
}
Expand Down Expand Up @@ -281,7 +291,10 @@ void PageFile::Writer::write(const WriteBatch & wb, PageEntriesEdit & edit)
// PageFile::Reader
// =========================================================

PageFile::Reader::Reader(PageFile & page_file) : data_file_path(page_file.dataPath()), data_file_fd(PageUtil::openFile<true>(data_file_path)) {}
PageFile::Reader::Reader(PageFile & page_file)
: data_file_path(page_file.dataPath()), data_file_fd(PageUtil::openFile<true>(data_file_path))
{
}

PageFile::Reader::~Reader()
{
Expand Down Expand Up @@ -402,7 +415,13 @@ void PageFile::Reader::read(PageIdAndEntries & to_read, const PageHandler & hand
const PageFile::Version PageFile::CURRENT_VERSION = 1;

PageFile::PageFile(PageFileId file_id_, UInt32 level_, const std::string & parent_path, bool is_tmp_, bool is_create, Logger * log_)
: file_id(file_id_), level(level_), is_tmp(is_tmp_), parent_path(parent_path), data_file_pos(0), meta_file_pos(0), log(log_)
: file_id(file_id_),
level(level_),
type(is_tmp_ ? Type::Temp : Type::Formal),
parent_path(parent_path),
data_file_pos(0),
meta_file_pos(0),
log(log_)
{
if (is_create)
{
Expand All @@ -413,42 +432,62 @@ PageFile::PageFile(PageFileId file_id_, UInt32 level_, const std::string & paren
}
}

std::pair<PageFile, bool> PageFile::recover(const std::string & parent_path, const std::string & page_file_name, Logger * log)
std::pair<PageFile, PageFile::Type> PageFile::recover(const String & parent_path, const String & page_file_name, Logger * log)
{

if (!startsWith(page_file_name, ".tmp.page_") && !startsWith(page_file_name, "page_"))
if (!startsWith(page_file_name, folder_prefix_formal) && !startsWith(page_file_name, folder_prefix_temp)
&& !startsWith(page_file_name, folder_prefix_legacy))
{
LOG_INFO(log, "Not page file, ignored " + page_file_name);
return {{}, false};
return {{}, Type::Invalid};
}
std::vector<std::string> ss;
boost::split(ss, page_file_name, boost::is_any_of("_"));
if (ss.size() != 3)
{
LOG_INFO(log, "Unrecognized file, ignored: " + page_file_name);
return {{}, false};
return {{}, Type::Invalid};
}
if (ss[0] == ".tmp.page")

PageFileId file_id = std::stoull(ss[1]);
UInt32 level = std::stoi(ss[2]);
PageFile pf(file_id, level, parent_path, /* is_temp */ false, /* is_create */ false, log);
if (ss[0] == folder_prefix_temp)
{
LOG_INFO(log, "Temporary page file, ignored: " + page_file_name);
return {{}, false};
return {{}, Type::Temp};
}
// ensure both meta && data exist
PageFileId file_id = std::stoull(ss[1]);
UInt32 level = std::stoi(ss[2]);
PageFile pf(file_id, level, parent_path, false, false, log);
if (!Poco::File(pf.metaPath()).exists())
else if (ss[0] == folder_prefix_legacy)
{
LOG_INFO(log, "Broken page without meta file, ignored: " + pf.metaPath());
return {{}, false};
pf.type = Type::Legacy;
// ensure meta exist
if (!Poco::File(pf.metaPath()).exists())
{
LOG_INFO(log, "Broken page without meta file, ignored: " + pf.metaPath());
return {{}, Type::Invalid};
}

return {pf, Type::Legacy};
}
if (!Poco::File(pf.dataPath()).exists())
else if (ss[0] == folder_prefix_formal)
{
LOG_INFO(log, "Broken page without data file, ignored: " + pf.dataPath());
return {{}, false};
// ensure both meta && data exist
if (!Poco::File(pf.metaPath()).exists())
{
LOG_INFO(log, "Broken page without meta file, ignored: " + pf.metaPath());
return {{}, Type::Invalid};
}

if (!Poco::File(pf.dataPath()).exists())
{
LOG_INFO(log, "Broken page without data file, ignored: " + pf.dataPath());
return {{}, Type::Invalid};
}
return {pf, Type::Formal};
}

return {pf, true};
LOG_INFO(log, "Unrecognized file prefix, ignored: " + page_file_name);
return {{}, Type::Invalid};
}

PageFile PageFile::newPageFile(PageFileId file_id, UInt32 level, const std::string & parent_path, bool is_tmp, Logger * log)
Expand Down Expand Up @@ -483,13 +522,36 @@ void PageFile::readAndSetPageMetas(PageEntriesEdit & edit)

void PageFile::setFormal()
{
if (!is_tmp)
if (type != Type::Temp)
return;
Poco::File file(folderPath());
is_tmp = false;
type = Type::Formal;
file.renameTo(folderPath());
}

void PageFile::setLegacy()
{
if (type != Type::Formal)
return;
// rename to legacy dir
Poco::File formal_dir(folderPath());
type = Type::Legacy;
formal_dir.renameTo(folderPath());
// remove the data part
if (auto data_file = Poco::File(dataPath()); data_file.exists())
{
data_file.remove();
}
}

void PageFile::removeDataIfExists() const
{
if (auto data_file = Poco::File(dataPath()); data_file.exists())
{
data_file.remove();
}
}

void PageFile::destroy() const
{
// TODO: delay remove.
Expand Down Expand Up @@ -526,4 +588,26 @@ UInt64 PageFile::getDataFileSize() const
return file.getSize();
}

String PageFile::folderPath() const
{
String path = parent_path + "/";
switch (type)
{
case Type::Formal:
path += folder_prefix_formal;
break;
case Type::Temp:
path += folder_prefix_temp;
break;
case Type::Legacy:
path += folder_prefix_legacy;
break;

case Type::Invalid:
throw Exception("Try to access folderPath of invalid page file.", ErrorCodes::LOGICAL_ERROR);
}
path += "_" + DB::toString(file_id) + "_" + DB::toString(level);
return path;
}

} // namespace DB
30 changes: 21 additions & 9 deletions dbms/src/Storages/Page/PageFile.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,18 @@ class PageFile : public Allocator<false>
};

public:
enum class Type
{
Invalid = 0,
Formal,
Temp, // written by GC thread
Legacy, // the data is obsoleted and has been removed, only meta left
};

/// Create an empty page file.
PageFile() = default;
/// Recover a page file from disk.
static std::pair<PageFile, bool> recover(const String & parent_path, const String & page_file_name, Poco::Logger * log);
static std::pair<PageFile, Type> recover(const String & parent_path, const String & page_file_name, Poco::Logger * log);
/// Create a new page file.
static PageFile newPageFile(PageFileId file_id, UInt32 level, const String & parent_path, bool is_tmp, Poco::Logger * log);
/// Open an existing page file for read.
Expand All @@ -95,6 +103,8 @@ class PageFile : public Allocator<false>

/// Rename this page file into formal style.
void setFormal();
/// Rename this page file into legacy style and remove data.
void setLegacy();
/// Destroy underlying system files.
void destroy() const;

Expand All @@ -112,23 +122,25 @@ class PageFile : public Allocator<false>
UInt64 getDataFileAppendPos() const { return data_file_pos; }
UInt64 getDataFileSize() const;
bool isExist() const;
void removeDataIfExists() const;

private:
/// Create a new page file.
PageFile(PageFileId file_id_, UInt32 level_, const String & parent_path, bool is_tmp_, bool is_create, Poco::Logger * log);

String folderPath() const
{
return parent_path + "/" + (is_tmp ? ".tmp.page_" : "page_") + DB::toString(file_id) + "_" + DB::toString(level);
}
String folderPath() const;
String dataPath() const { return folderPath() + "/page"; }
String metaPath() const { return folderPath() + "/meta"; }

constexpr static const char * folder_prefix_formal = "page";
constexpr static const char * folder_prefix_temp = ".temp.page";
constexpr static const char * folder_prefix_legacy = "legacy.page";

private:
UInt64 file_id = 0; // Valid id start from 1.
UInt32 level = 0; // 0: normal, >= 1: generated by GC.
bool is_tmp = false; // true if currently writen by GC thread.
String parent_path{}; // The parent folder of this page file.
UInt64 file_id = 0; // Valid id start from 1.
UInt32 level = 0; // 0: normal, >= 1: generated by GC.
Type type = Type::Formal;
String parent_path{}; // The parent folder of this page file.

// The append pos.
UInt64 data_file_pos = 0;
Expand Down
Loading