Skip to content

Commit

Permalink
Merge pull request #20149 from andrwng/initial-path-provider
Browse files Browse the repository at this point in the history
cloud_storage: add remote path provider
  • Loading branch information
dotnwat authored Jun 27, 2024
2 parents 4cdf78d + 3ac6e49 commit 65f6b39
Show file tree
Hide file tree
Showing 15 changed files with 753 additions and 2 deletions.
4 changes: 4 additions & 0 deletions src/v/cloud_storage/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ v_cc_library(
cache_probe.cc
download_exception.cc
partition_manifest.cc
partition_path_utils.cc
recursive_directory_walker.cc
remote.cc
remote_file.cc
remote_path_provider.cc
offset_translation_layer.cc
remote_probe.cc
read_path_probes.cc
Expand All @@ -38,7 +40,9 @@ v_cc_library(
segment_chunk.cc
segment_chunk_api.cc
segment_chunk_data_source.cc
segment_path_utils.cc
topic_manifest.cc
topic_path_utils.cc
async_manifest_view.cc
materialized_manifest_cache.cc
anomalies_detector.cc
Expand Down
1 change: 1 addition & 0 deletions src/v/cloud_storage/fwd.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class cache;
class partition_recovery_manager;
class remote;
class remote_partition;
class remote_path_provider;
class remote_segment;
class partition_manifest;
class topic_manifest;
Expand Down
3 changes: 3 additions & 0 deletions src/v/cloud_storage/partition_manifest.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,9 @@ class partition_manifest : public base_manifest {
}
}

static ss::sstring filename() { return "manifest.bin"; }
virtual ss::sstring get_manifest_filename() const { return filename(); }

remote_manifest_path get_manifest_path() const override {
return get_manifest_format_and_path().second;
}
Expand Down
52 changes: 52 additions & 0 deletions src/v/cloud_storage/partition_path_utils.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Copyright 2024 Redpanda Data, Inc.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.md
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0
#include "cloud_storage/partition_path_utils.h"

#include "cloud_storage/remote_label.h"
#include "hashing/xx.h"

namespace cloud_storage {

ss::sstring labeled_partition_manifest_prefix(
const remote_label& label,
const model::ntp& ntp,
model::initial_revision_id rev) {
return fmt::format(
"{}/meta/{}_{}", label.cluster_uuid(), ntp.path(), rev());
}

ss::sstring labeled_partition_manifest_path(
const remote_label& label,
const model::ntp& ntp,
model::initial_revision_id rev) {
return fmt::format(
"{}/manifest.bin", labeled_partition_manifest_prefix(label, ntp, rev));
}

ss::sstring prefixed_partition_manifest_prefix(
const model::ntp& ntp, model::initial_revision_id rev) {
constexpr uint32_t bitmask = 0xF0000000;
const auto ntp_rev = ssx::sformat("{}_{}", ntp.path(), rev());
uint32_t hash = bitmask & xxhash_32(ntp_rev.data(), ntp_rev.size());
return fmt::format("{:08x}/meta/{}", hash, ntp_rev);
}

ss::sstring prefixed_partition_manifest_bin_path(
const model::ntp& ntp, model::initial_revision_id rev) {
return fmt::format(
"{}/manifest.bin", prefixed_partition_manifest_prefix(ntp, rev));
}

ss::sstring prefixed_partition_manifest_json_path(
const model::ntp& ntp, model::initial_revision_id rev) {
return fmt::format(
"{}/manifest.json", prefixed_partition_manifest_prefix(ntp, rev));
}

} // namespace cloud_storage
44 changes: 44 additions & 0 deletions src/v/cloud_storage/partition_path_utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// Copyright 2024 Redpanda Data, Inc.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.md
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0
#pragma once

#include "base/seastarx.h"
#include "cloud_storage/remote_label.h"
#include "model/fundamental.h"
#include "model/metadata.h"

#include <seastar/core/sstring.hh>

namespace cloud_storage {

// 806a0f4a-e691-4a2b-9352-ec4b769a5e6e/meta/kafka/panda-topic/0_123
ss::sstring labeled_partition_manifest_prefix(
const remote_label& cluster_hint,
const model::ntp& ntp,
model::initial_revision_id rev);

// 806a0f4a-e691-4a2b-9352-ec4b769a5e6e/meta/kafka/panda-topic/0_123/manifest.bin
ss::sstring labeled_partition_manifest_path(
const remote_label& cluster_hint,
const model::ntp& ntp,
model::initial_revision_id rev);

// a0000000/meta/kafka/panda-topic/0_123
ss::sstring prefixed_partition_manifest_prefix(
const model::ntp& ntp, model::initial_revision_id rev);

// a0000000/meta/kafka/panda-topic/0_123/manifest.bin
ss::sstring prefixed_partition_manifest_bin_path(
const model::ntp& ntp, model::initial_revision_id rev);

// a0000000/meta/kafka/panda-topic/0_123/manifest.json
ss::sstring prefixed_partition_manifest_json_path(
const model::ntp& ntp, model::initial_revision_id rev);

} // namespace cloud_storage
37 changes: 37 additions & 0 deletions src/v/cloud_storage/remote_label.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// Copyright 2024 Redpanda Data, Inc.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.md
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0
#pragma once

#include "model/fundamental.h"

namespace cloud_storage {

// Label that can be used to uniquely identify objects written by a cluster.
struct remote_label
: serde::envelope<remote_label, serde::version<0>, serde::compat_version<0>> {
auto serde_fields() { return std::tie(cluster_uuid); }
friend bool operator==(const remote_label&, const remote_label&) = default;
remote_label() = default;
explicit remote_label(model::cluster_uuid id)
: cluster_uuid(id) {}

// TODO: add a user-defined label.

// The cluster UUID of a given cluster. This is critical in avoiding
// collisions when multiple clusters use the same bucket.
model::cluster_uuid cluster_uuid{};

friend std::ostream&
operator<<(std::ostream& os, const remote_label& label) {
fmt::print(os, "{{cluster_uuid: {}}}", label.cluster_uuid);
return os;
}
};

} // namespace cloud_storage
101 changes: 101 additions & 0 deletions src/v/cloud_storage/remote_path_provider.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
// Copyright 2024 Redpanda Data, Inc.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.md
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0
#include "cloud_storage/remote_path_provider.h"

#include "cloud_storage/partition_manifest.h"
#include "cloud_storage/partition_path_utils.h"
#include "cloud_storage/remote_label.h"
#include "cloud_storage/segment_path_utils.h"
#include "cloud_storage/spillover_manifest.h"
#include "cloud_storage/topic_path_utils.h"
#include "cloud_storage/types.h"

namespace cloud_storage {

remote_path_provider::remote_path_provider(std::optional<remote_label> label)
: label_(label) {}

ss::sstring remote_path_provider::topic_manifest_prefix(
const model::topic_namespace& topic) const {
if (label_.has_value()) {
return labeled_topic_manifest_prefix(*label_, topic);
}
return prefixed_topic_manifest_prefix(topic);
}

ss::sstring remote_path_provider::topic_manifest_path(
const model::topic_namespace& topic, model::initial_revision_id rev) const {
if (label_.has_value()) {
return labeled_topic_manifest_path(*label_, topic, rev);
}
return prefixed_topic_manifest_bin_path(topic);
}

ss::sstring remote_path_provider::partition_manifest_prefix(
const model::ntp& ntp, model::initial_revision_id rev) const {
if (label_.has_value()) {
return labeled_partition_manifest_prefix(*label_, ntp, rev);
}
return prefixed_partition_manifest_prefix(ntp, rev);
}

ss::sstring remote_path_provider::partition_manifest_path(
const partition_manifest& manifest) const {
return fmt::format(
"{}/{}",
partition_manifest_prefix(manifest.get_ntp(), manifest.get_revision_id()),
manifest.get_manifest_filename());
}

ss::sstring remote_path_provider::partition_manifest_path(
const model::ntp& ntp, model::initial_revision_id rev) const {
return fmt::format(
"{}/{}",
partition_manifest_prefix(ntp, rev),
partition_manifest::filename());
}

std::optional<ss::sstring> remote_path_provider::partition_manifest_path_json(
const model::ntp& ntp, model::initial_revision_id rev) const {
if (label_.has_value()) {
return std::nullopt;
}
return prefixed_partition_manifest_json_path(ntp, rev);
}

ss::sstring remote_path_provider::spillover_manifest_path(
const partition_manifest& stm_manifest,
const spillover_manifest_path_components& c) const {
return fmt::format(
"{}/{}",
partition_manifest_prefix(
stm_manifest.get_ntp(), stm_manifest.get_revision_id()),
spillover_manifest::filename(c));
}

ss::sstring remote_path_provider::segment_path(
const model::ntp& ntp,
model::initial_revision_id rev,
const segment_meta& segment) const {
const auto segment_name = partition_manifest::generate_remote_segment_name(
segment);
if (label_.has_value()) {
return labeled_segment_path(
*label_, ntp, rev, segment_name, segment.archiver_term);
}
return prefixed_segment_path(ntp, rev, segment_name, segment.archiver_term);
}

ss::sstring remote_path_provider::segment_path(
const partition_manifest& manifest, const segment_meta& segment) const {
return segment_path(
manifest.get_ntp(), manifest.get_revision_id(), segment);
}

} // namespace cloud_storage
75 changes: 75 additions & 0 deletions src/v/cloud_storage/remote_path_provider.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
// Copyright 2024 Redpanda Data, Inc.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.md
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0
#pragma once

#include "cloud_storage/fwd.h"
#include "cloud_storage/remote_label.h"
#include "cloud_storage/types.h"
#include "model/fundamental.h"
#include "model/metadata.h"

#include <seastar/core/sstring.hh>

#include <optional>

namespace cloud_storage {

class remote_path_provider {
public:
explicit remote_path_provider(std::optional<remote_label> label);

// Prefix of the topic manifest path. This can be used to filter objects to
// find topic manifests.
ss::sstring
topic_manifest_prefix(const model::topic_namespace& topic) const;

// Topic manifest path.
ss::sstring topic_manifest_path(
const model::topic_namespace& topic, model::initial_revision_id) const;

// Prefix of the partition manifest path. This can be used to filter
// objects to find partition or spillover manifests.
ss::sstring partition_manifest_prefix(
const model::ntp& ntp, model::initial_revision_id) const;

// Partition manifest path. The returned path is expected to be used as the
// path of STM manifest (i.e. not a spillover manifest).
ss::sstring partition_manifest_path(
const model::ntp& ntp, model::initial_revision_id) const;

// Partition manifest path.
// NOTE: also accepts subclasses of partition manifest, e.g. spillover
// manifests.
ss::sstring
partition_manifest_path(const partition_manifest& manifest) const;

// The JSON path of a partition manifest, if supported by `label_`.
// E.g., when a label is supplied, partition manifests are expected to not
// be written as JSON, and this will return std::nullopt.
std::optional<ss::sstring> partition_manifest_path_json(
const model::ntp& ntp, model::initial_revision_id) const;

// Spillover manifest path.
ss::sstring spillover_manifest_path(
const partition_manifest& stm_manifest,
const spillover_manifest_path_components& c) const;

// Segment paths.
ss::sstring segment_path(
const partition_manifest& manifest, const segment_meta& segment) const;
ss::sstring segment_path(
const model::ntp& ntp,
model::initial_revision_id rev,
const segment_meta& segment) const;

private:
std::optional<remote_label> label_;
};

} // namespace cloud_storage
Loading

0 comments on commit 65f6b39

Please sign in to comment.