Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Additional GCP authentication #3541

Merged
merged 4 commits into from
Jan 25, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 222 additions & 2 deletions object_store/src/gcp/credential.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,26 @@
use crate::client::retry::RetryExt;
use crate::client::token::TemporaryToken;
use crate::RetryConfig;
use async_trait::async_trait;
use base64::prelude::BASE64_URL_SAFE_NO_PAD;
use base64::Engine;
use reqwest::{Client, Method};
use ring::signature::RsaKeyPair;
use snafu::{ResultExt, Snafu};
use std::env;
use std::fs::File;
use std::io::BufReader;
use std::path::Path;
use std::time::{Duration, Instant};

#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("Unable to open service account file: {}", source))]
OpenCredentials { source: std::io::Error },

#[snafu(display("Unable to decode service account file: {}", source))]
DecodeCredentials { source: serde_json::Error },

#[snafu(display("No RSA key found in pem file"))]
MissingKey,

Expand Down Expand Up @@ -104,6 +115,15 @@ struct TokenResponse {
expires_in: u64,
}

#[async_trait]
pub(crate) trait TokenProvider: std::fmt::Debug + Send + Sync {
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
async fn fetch_token(
&self,
client: &Client,
retry: &RetryConfig,
) -> Result<TemporaryToken<String>>;
}

/// Encapsulates the logic to perform an OAuth token challenge
#[derive(Debug)]
pub struct OAuthProvider {
Expand Down Expand Up @@ -138,9 +158,12 @@ impl OAuthProvider {
random: ring::rand::SystemRandom::new(),
})
}
}

#[async_trait]
impl TokenProvider for OAuthProvider {
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
/// Fetch a fresh token
pub async fn fetch_token(
async fn fetch_token(
&self,
client: &Client,
retry: &RetryConfig,
Expand Down Expand Up @@ -195,6 +218,88 @@ impl OAuthProvider {
}
}

fn reader_credentials_file<T>(
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
service_account_path: impl AsRef<std::path::Path>,
) -> Result<T>
where
T: serde::de::DeserializeOwned,
{
let file = File::open(service_account_path).context(OpenCredentialsSnafu)?;
let reader = BufReader::new(file);
serde_json::from_reader(reader).context(DecodeCredentialsSnafu)
}

/// A deserialized `service-account-********.json`-file.
#[derive(serde::Deserialize, Debug)]
pub(crate) struct ServiceAccountCredentials {
/// The private key in RSA format.
pub private_key: String,

/// The email address associated with the service account.
pub client_email: String,

/// Base URL for GCS
#[serde(default = "default_gcs_base_url")]
pub gcs_base_url: String,

/// Disable oauth and use empty tokens.
#[serde(default = "default_disable_oauth")]
pub disable_oauth: bool,
}

pub(crate) fn default_gcs_base_url() -> String {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I appreciate these were just copied but they could be constants instead of functions FWIW

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think these are used to enable some integration testing and as such will never be in the json file in normal operation. According to the serde documentation they need to be functions... https://serde.rs/field-attrs.html#default--path

"https://storage.googleapis.com".to_owned()
}

pub(crate) fn default_disable_oauth() -> bool {
false
}

impl ServiceAccountCredentials {
/// Create a new [`ServiceAccountCredentials`] from a file.
pub fn from_file<P: AsRef<std::path::Path>>(path: P) -> Result<Self> {
reader_credentials_file(path)
}

/// Create a new [`ServiceAccountCredentials`] from a string.
pub fn from_key(key: &str) -> Result<Self> {
serde_json::from_str(key).context(DecodeCredentialsSnafu)
}

/// Create an [`OAuthProvider`] from this credentials struct.
pub fn token_provider(
self,
scope: &str,
audience: &str,
) -> Result<Box<dyn TokenProvider>> {
Ok(Box::new(OAuthProvider::new(
self.client_email,
self.private_key,
scope.to_string(),
audience.to_string(),
)?) as Box<dyn TokenProvider>)
}
}

/// A no-op provider that returns empty tokens
#[derive(Debug)]
pub struct NoOpProvider;

#[async_trait]
impl TokenProvider for NoOpProvider {
/// Fetch a fresh token
async fn fetch_token(
&self,
_client: &Client,
_retry: &RetryConfig,
) -> Result<TemporaryToken<String>> {
Ok(TemporaryToken {
token: "".to_string(),
expiry: Instant::now(),
})
}
}

winding-lines marked this conversation as resolved.
Show resolved Hide resolved
/// Returns the number of seconds since unix epoch
fn seconds_since_epoch() -> u64 {
std::time::SystemTime::now()
Expand All @@ -205,7 +310,7 @@ fn seconds_since_epoch() -> u64 {

fn decode_first_rsa_key(private_key_pem: String) -> Result<RsaKeyPair> {
use rustls_pemfile::Item;
use std::io::{BufReader, Cursor};
use std::io::Cursor;

let mut cursor = Cursor::new(private_key_pem);
let mut reader = BufReader::new(&mut cursor);
Expand All @@ -222,3 +327,118 @@ fn b64_encode_obj<T: serde::Serialize>(obj: &T) -> Result<String> {
let string = serde_json::to_string(obj).context(EncodeSnafu)?;
Ok(BASE64_URL_SAFE_NO_PAD.encode(string))
}

/// A provider that uses the Google Cloud Platform metadata server to fetch a token.
#[derive(Debug, Default)]
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
pub struct InstanceCredentialProvider {
audience: String,
}

impl InstanceCredentialProvider {
pub fn new<T: Into<String>>(audience: T) -> Self {
Self {
audience: audience.into(),
}
}
}

#[async_trait]
impl TokenProvider for InstanceCredentialProvider {
async fn fetch_token(
&self,
client: &Client,
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
retry: &RetryConfig,
) -> Result<TemporaryToken<String>> {
println!("fetching token from metadata server");
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
const TOKEN_URL: &str =
"http://metadata/computeMetadata/v1/instance/service-accounts/default/token";
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"http://metadata/computeMetadata/v1/instance/service-accounts/default/token";
"http://metadata.google.internal/computeMetadata/v1/instance/service-accounts/default/token";

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Google's documentation specifically has only the hostname without the google.internal domain. I was able to test with the hostname only. I see the value of using the full domain here but I prefer to stick with the documenation.

Copy link
Contributor

@jychen7 jychen7 Jan 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

understood, I think Google just have inconsistent documents. Both work for me.

ps: I was looking at https://cloud.google.com/kubernetes-engine/docs/concepts/workload-identity#metadata_server and it mentions the full hostname metadata.google.internal and IP 169.254.169.254. It also reference https://cloud.google.com/compute/docs/metadata/overview

I agree that https://cloud.google.com/docs/authentication/get-id-token#metadata-server only show metadata

(sorry, I can not close this comment, maybe no permission. Feel free to ignore)

let response: TokenResponse = client
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
.request(Method::GET, TOKEN_URL)
.header("Metadata-Flavor", "Google")
.query(&[("audience", &self.audience)])
.send_retry(retry)
.await
.context(TokenRequestSnafu)?
.json()
.await
.context(TokenResponseBodySnafu)?;
let token = TemporaryToken {
token: response.access_token,
expiry: Instant::now() + Duration::from_secs(response.expires_in),
};
Ok(token)
}
}

/// A deserialized `application_default_credentials.json`-file.
#[derive(serde::Deserialize, Debug)]
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
pub struct ApplicationDefaultCredentials {
client_id: String,
client_secret: String,
refresh_token: String,
#[serde(rename = "type")]
type_: String,
}

impl ApplicationDefaultCredentials {
const DEFAULT_TOKEN_GCP_URI: &'static str =
"https://accounts.google.com/o/oauth2/token";
const CREDENTIALS_PATH: &'static str =
".config/gcloud/application_default_credentials.json";
const EXPECTED_TYPE: &str = "authorized_user";

// Create a new application default credential in the following situations:
// 1. a file is passed in and the type matches.
// 2. without argument if the well-known configuration file is present.
pub fn new(path: Option<&str>) -> Result<Option<Self>, Error> {
if let Some(path) = path {
if let Ok(credentials) = reader_credentials_file::<Self>(path) {
if credentials.type_ == Self::EXPECTED_TYPE {
return Ok(Some(credentials));
}
}
// Other credential mechanisms may be able to use this path.
return Ok(None);
winding-lines marked this conversation as resolved.
Show resolved Hide resolved
}
if let Some(home) = env::var_os("HOME") {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is potentially fragile, but adding an additional dependency is probably not worth it so LGTM

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dirs look like a good option but you mentioned wanting to keep the dependencies lean :)

let path = Path::new(&home).join(Self::CREDENTIALS_PATH);

// It's expected for this file to not exist unless it has been explicitly configured by the user.
if path.try_exists().unwrap_or(false) {
return reader_credentials_file::<Self>(path).map(Some);
}
}
Ok(None)
}
}

#[async_trait]
impl TokenProvider for ApplicationDefaultCredentials {
async fn fetch_token(
&self,
client: &Client,
retry: &RetryConfig,
) -> Result<TemporaryToken<String>, Error> {
let body = [
("grant_type", "refresh_token"),
("client_id", &self.client_id),
("client_secret", &self.client_secret),
("refresh_token", &self.refresh_token),
];

let response = client
.request(Method::POST, Self::DEFAULT_TOKEN_GCP_URI)
.form(&body)
.send_retry(retry)
.await
.context(TokenRequestSnafu)?
.json::<TokenResponse>()
.await
.context(TokenResponseBodySnafu)?;
let token = TemporaryToken {
token: response.access_token,
expiry: Instant::now() + Duration::from_secs(response.expires_in),
};
Ok(token)
}
}
Loading