Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

Wire up candidate backing, approval-voting to disputes #3348

Merged
29 commits merged into from
Jul 9, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
fad7f5a
add a from_backing_statement to SignedDisputeStatement
rphmeier Jun 22, 2021
0befef3
inform dispute coordinator of all backing statements
rphmeier Jun 22, 2021
05bf089
add dispute coordinator message to backing tests
rphmeier Jun 22, 2021
1e1d347
send positive dispute statement with every approval
rphmeier Jun 22, 2021
604b191
issue disputes when encountering invalid candidates.
rphmeier Jun 22, 2021
8554667
try to fix flaky test for CI (passed locally)
rphmeier Jun 23, 2021
2d9234b
guide: keep track of concluded-positive disputes until pruned
rphmeier Jun 23, 2021
b5d2489
guide: block implications
rphmeier Jun 23, 2021
73c478b
guide: new dispute inherent flow
rphmeier Jun 23, 2021
8b201fd
mostly implement recency changes for dispute coordinator
rphmeier Jul 6, 2021
88ac217
add a clock to dispute coordinator
rphmeier Jul 7, 2021
6873516
adjust DB tests
rphmeier Jul 7, 2021
b394e97
fix and add new dispute coordinator tests
rphmeier Jul 7, 2021
4e3f813
provisioner: select disputes
rphmeier Jul 7, 2021
264826b
import all validators' approvals
rphmeier Jul 8, 2021
38cf292
Merge branch 'master' into rh-wire-up-disputes
rphmeier Jul 8, 2021
287c73f
address nit: refactor backing statement submission
rphmeier Jul 8, 2021
87f3204
gracefully handle disconnected dispute coordinator
rphmeier Jul 8, 2021
72f439a
remove `review` comment
rphmeier Jul 8, 2021
1a101b0
Merge branch 'master' into rh-wire-up-disputes
rphmeier Jul 8, 2021
f29b7b7
fix up old_tests
rphmeier Jul 8, 2021
6776c5a
Merge branch 'master' into rh-wire-up-disputes
rphmeier Jul 8, 2021
d56070d
fix approval-voting compilation
rphmeier Jul 8, 2021
6ec43bc
fix backing compilation
rphmeier Jul 8, 2021
4a1297b
Merge branch 'master' into rh-wire-up-disputes
rphmeier Jul 8, 2021
3d830da
use known-leaves in WaitForActivation
rphmeier Jun 23, 2021
c886706
Merge branch 'master' into rh-wire-up-disputes
rphmeier Jul 9, 2021
b2170c3
follow-up test fixing
rphmeier Jul 9, 2021
2229d3c
add back allow(dead_code)
rphmeier Jul 9, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 74 additions & 21 deletions node/core/approval-voting/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ use polkadot_node_subsystem::{
AssignmentCheckError, AssignmentCheckResult, ApprovalCheckError, ApprovalCheckResult,
ApprovalVotingMessage, RuntimeApiMessage, RuntimeApiRequest, ChainApiMessage,
ApprovalDistributionMessage, CandidateValidationMessage,
AvailabilityRecoveryMessage,
AvailabilityRecoveryMessage, DisputeCoordinatorMessage,
},
errors::RecoveryError,
Subsystem, SubsystemContext, SubsystemError, SubsystemResult, SpawnedSubsystem,
Expand All @@ -41,9 +41,10 @@ use polkadot_primitives::v1::{
ValidatorIndex, Hash, SessionIndex, SessionInfo, CandidateHash,
CandidateReceipt, BlockNumber,
ValidatorPair, ValidatorSignature, ValidatorId,
CandidateIndex, GroupIndex, ApprovalVote,
CandidateIndex, GroupIndex, ApprovalVote, DisputeStatement,
ValidDisputeStatementKind,
};
use polkadot_node_primitives::ValidationResult;
use polkadot_node_primitives::{SignedDisputeStatement, ValidationResult};
use polkadot_node_primitives::approval::{
IndirectAssignmentCert, IndirectSignedApprovalVote, DelayTranche, BlockApprovalMeta,
};
Expand Down Expand Up @@ -899,7 +900,12 @@ async fn handle_actions(
metrics,
candidate_hash,
approval_request,
)?.into_iter().map(|v| v.clone()).chain(actions_iter).collect();
).await?
.into_iter()
.map(|v| v.clone())
.chain(actions_iter)
.collect();

actions_iter = next_actions.into_iter();
}
Action::LaunchApproval {
Expand Down Expand Up @@ -2150,8 +2156,12 @@ async fn launch_approval(
(candidate_hash, candidate.descriptor.para_id),
);

// TODO: dispute. Either the merkle trie is bad or the erasure root is.
// https://github.com/paritytech/polkadot/issues/2176
sender.send_message(DisputeCoordinatorMessage::IssueLocalStatement(
session_index,
candidate_hash,
candidate.clone(),
false,
).into()).await;
metrics_guard.take().on_approval_invalid();
}
}
Expand Down Expand Up @@ -2199,7 +2209,7 @@ async fn launch_approval(
sender.send_message(CandidateValidationMessage::ValidateFromExhaustive(
available_data.validation_data,
validation_code,
candidate.descriptor,
candidate.descriptor.clone(),
available_data.pov,
val_tx,
).into()).await;
Expand All @@ -2210,7 +2220,7 @@ async fn launch_approval(
validator_index,
candidate_hash,
),
Ok(Ok(ValidationResult::Valid(_, _))) => {
Ok(Ok(ValidationResult::Valid(commitments, _))) => {
// Validation checked out. Issue an approval command. If the underlying service is unreachable,
// then there isn't anything we can do.

Expand All @@ -2221,11 +2231,28 @@ async fn launch_approval(
"Candidate Valid",
);

let _ = metrics_guard.take();
return ApprovalState::approved(
validator_index,
candidate_hash,
);
let expected_commitments_hash = candidate.commitments_hash;
if commitments.hash() == expected_commitments_hash {
let _ = metrics_guard.take();
return ApprovalState::approved(
validator_index,
candidate_hash,
);
} else {
// Commitments mismatch - issue a dispute.
sender.send_message(DisputeCoordinatorMessage::IssueLocalStatement(
session_index,
candidate_hash,
candidate.clone(),
false,
).into()).await;

metrics_guard.take().on_approval_invalid();
return ApprovalState::failed(
validator_index,
candidate_hash,
);
}
}
Ok(Ok(ValidationResult::Invalid(reason))) => {
tracing::warn!(
Expand All @@ -2236,10 +2263,16 @@ async fn launch_approval(
"Detected invalid candidate as an approval checker.",
);

// TODO: issue dispute, but not for timeouts.
// https://github.com/paritytech/polkadot/issues/2176
metrics_guard.take().on_approval_invalid();
// REVIEW: should timeouts and internal execution errors lead to dispute?
// only some execution errors?
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pepyakin I'd appreciate your input here!

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not being @pepyakin there are a definitely a few variants that I'd expect to trigger a dispute BadParent, PoVHashMismatch,ParaHeadHashMismatch,BadSignature,CodeHashMismatch,InvalidOutputs,CodeDecompressionFailure,PoVDecompressionFailure,BadReturn.

ParamsTooLarge(u64), CodeTooLarge(u64), depends on the limits we set, but these should be checked when submitting i.e. a code update to avoid this upfront.

#[derive(Debug)]
pub enum InvalidCandidate {
    /// Failed to execute.`validate_block`. This includes function panicking.
    ExecutionError(String),
    /// Validation outputs check doesn't pass.
    InvalidOutputs,
    /// Execution timeout.
    Timeout,
    /// Validation input is over the limit.
    ParamsTooLarge(u64),
    /// Code size is over the limit.
    CodeTooLarge(u64),
    /// Code does not decompress correctly.
    CodeDecompressionFailure,
    /// PoV does not decompress correctly.
    PoVDecompressionFailure,
    /// Validation function returned invalid data.
    BadReturn,
    /// Invalid relay chain parent.
    BadParent,
    /// POV hash does not match.
    PoVHashMismatch,
    /// Bad collator signature.
    BadSignature,
    /// Para head hash does not match.
    ParaHeadHashMismatch,
    /// Validation code hash does not match.
    CodeHashMismatch,
}

the bigger question is regarding Timeout as discussed at the retreat which depends on hardware, and other errors, that depend on the particular impl details.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am myself not entirely sure and I would've approached you with the same questions. Here are my thoughts anyway,

In ideal world, internal errors should be extremely rare and only raise due to misconfiguration or hardware malfunction. That implies that such a failure should only happen in isolation. Given that, it would be fair from the protocol perspective to no-show in such a case. We could introduce logic that issues a dispute here. However, a rational even if honest validator will be incentivised to comment out this piece of code. I feel this is the case even if in presence of precedents of refunding such slashes. Nobody wants to snitch on themselves and get potentially slashed.

In the real world there are bugs though. Inclusion of a bad candidate can be really bad under some circumstances. Specifically, we assume that a bad candidate thanks to messaging (XCMP or bridges) taints everything it touches. Reverting a slash seems to be an easier task than reverting arbitrary changes made by a swarm of state machines. Some of those are out of our reach (i.e. bridges to other chains). I am not sure how realistic such an attack is, I don't have a scenario.

It seems that we should pick one of the options here. I am a bit more inclined to make those a slashing event and defer to the governance. Hopefully, the validators will be not so rational and there will be some friction due to the fact that this decision comes from the client devs and this is on by default.

Regarding the timeouts. After we have the PVF prechecking game, then we won't deal with timeouts by compilation. This leaves us only with timeouts by execution. I am not familiar enough with the t_good, t_bad, t_ugly model but I assume now it should solve the issue about difference in hardware and compiler versions. If I understand correctly, they have their own criteria about this and it depends on the exact amount of time spent on validation. This should be easy to get from the validation host.

I am not yet sure what's the best way to deal with this issue in the mean time.

sender.send_message(DisputeCoordinatorMessage::IssueLocalStatement(
session_index,
candidate_hash,
candidate.clone(),
false,
).into()).await;

metrics_guard.take().on_approval_invalid();
return ApprovalState::failed(
validator_index,
candidate_hash,
Expand Down Expand Up @@ -2267,7 +2300,7 @@ async fn launch_approval(

// Issue and import a local approval vote. Should only be invoked after approval checks
// have been done.
fn issue_approval(
async fn issue_approval(
ctx: &mut impl SubsystemSender,
state: &State<impl DBReader>,
metrics: &Metrics,
Expand Down Expand Up @@ -2362,26 +2395,37 @@ fn issue_approval(
}
};

let session = block_entry.session();
let sig = match sign_approval(
&state.keystore,
&validator_pubkey,
candidate_hash,
block_entry.session(),
session,
) {
Some(sig) => sig,
None => {
tracing::warn!(
target: LOG_TARGET,
"Could not issue approval signature with validator index {} in session {}. Assignment key present but not validator key?",
validator_index.0,
block_entry.session(),
validator_index = ?validator_index,
session,
"Could not issue approval signature. Assignment key present but not validator key?",
);

metrics.on_approval_error();
return Ok(Vec::new());
}
};

// Record our statement in the dispute coordinator for later
// participation in disputes on the same candidate.
let signed_dispute_statement = SignedDisputeStatement::new_checked(
DisputeStatement::Valid(ValidDisputeStatementKind::ApprovalChecking),
candidate_hash,
session,
validator_pubkey.clone(),
sig.clone(),
).expect("Statement just signed; should pass checks; qed");

tracing::debug!(
target: LOG_TARGET,
?candidate_hash,
Expand All @@ -2390,6 +2434,7 @@ fn issue_approval(
"Issuing approval vote",
);

let candidate_receipt = candidate_entry.candidate_receipt().clone();
let actions = import_checked_approval(
state,
metrics,
Expand All @@ -2411,6 +2456,14 @@ fn issue_approval(
}
).into());

// dispatch to dispute coordintaor.
ctx.send_message(DisputeCoordinatorMessage::ImportStatements {
candidate_hash,
candidate_receipt,
session,
statements: vec![(signed_dispute_statement, validator_index)],
}.into()).await;

Ok(actions)
}

Expand Down
65 changes: 62 additions & 3 deletions node/core/backing/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@ use polkadot_primitives::v1::{
BackedCandidate, CandidateCommitments, CandidateDescriptor, CandidateHash,
CandidateReceipt, CollatorId, CommittedCandidateReceipt, CoreIndex, CoreState, Hash, Id as ParaId,
SigningContext, ValidatorId, ValidatorIndex, ValidatorSignature, ValidityAttestation,
SessionIndex,
};
use polkadot_node_primitives::{
Statement, SignedFullStatement, ValidationResult, PoV, AvailableData,
Statement, SignedFullStatement, ValidationResult, PoV, AvailableData, SignedDisputeStatement,
};
use polkadot_subsystem::{
PerLeafSpan, Stage, SubsystemSender,
Expand All @@ -41,7 +42,7 @@ use polkadot_subsystem::{
AllMessages, AvailabilityDistributionMessage, AvailabilityStoreMessage,
CandidateBackingMessage, CandidateValidationMessage, CollatorProtocolMessage,
ProvisionableData, ProvisionerMessage, RuntimeApiRequest,
StatementDistributionMessage, ValidationFailed
StatementDistributionMessage, ValidationFailed, DisputeCoordinatorMessage,
}
};
use polkadot_node_subsystem_util::{
Expand Down Expand Up @@ -150,6 +151,8 @@ impl ValidatedCandidateCommand {
pub struct CandidateBackingJob {
/// The hash of the relay parent on top of which this job is doing it's work.
parent: Hash,
/// The session index this corresponds to.
session_index: SessionIndex,
/// The `ParaId` assigned to this validator
assignment: Option<ParaId>,
/// The collator required to author the candidate, if any.
Expand Down Expand Up @@ -768,12 +771,67 @@ impl CandidateBackingJob {
"Importing statement",
);

let candidate_hash = statement.payload().candidate_hash();
let import_statement_span = {
// create a span only for candidates we're already aware of.
let candidate_hash = statement.payload().candidate_hash();
self.get_unbacked_statement_child(root_span, candidate_hash, statement.validator_index())
};

// Dispatch the statement to the dispute coordinator.
{
let validator_index = statement.validator_index();
let signing_context = SigningContext {
parent_hash: self.parent,
session_index: self.session_index,
};

let validator_public = match self.table_context
.validators
.get(validator_index.0 as usize)
{
None => {
tracing::warn!(
target: LOG_TARGET,
session_index = ?self.session_index,
relay_parent = ?self.parent,
?validator_index,
"Supposedly 'Signed' statement has validator index out of bounds."
);

return Ok(None);
}
Some(v) => v,
};

let maybe_candidate_receipt = match statement.payload() {
Statement::Seconded(receipt) => Some(receipt.to_plain()),
Statement::Valid(candidate_hash) => {
// Valid statements are only supposed to be imported
// once we've seen at least one `Seconded` statement.
self.table.get_candidate(&candidate_hash).map(|c| c.to_plain())
}
};

let maybe_signed_dispute_statement = SignedDisputeStatement::from_backing_statement(
statement.as_unchecked(),
signing_context,
validator_public.clone(),
).ok();

if let (Some(candidate_receipt), Some(dispute_statement))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

= (maybe_candidate_receipt, maybe_signed_dispute_statement)
{
sender.send_message(
DisputeCoordinatorMessage::ImportStatements {
candidate_hash,
candidate_receipt,
session: self.session_index,
statements: vec![(dispute_statement, validator_index)],
}.into()
).await;
}
}
rphmeier marked this conversation as resolved.
Show resolved Hide resolved

let stmt = primitive_statement_to_table(statement);

let summary = self.table.import_statement(&self.table_context, stmt);
Expand Down Expand Up @@ -1199,6 +1257,7 @@ impl util::JobTrait for CandidateBackingJob {
let (background_tx, background_rx) = mpsc::channel(16);
let job = CandidateBackingJob {
parent,
session_index,
assignment,
required_collator,
issued_statements: HashSet::new(),
Expand Down
Loading