Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: allow M1 building by removing asm sha on aarch64 #1444

Closed
wants to merge 8 commits into from
Closed
49 changes: 49 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ executors:
image: ubuntu-1604-cuda-10.1:201909-23
working_directory: ~/gpuci
resource_class: gpu.nvidia.medium
arm:
machine:
image: ubuntu-2004:202101-01
resource_class: arm.large

setup-env: &setup-env
FIL_PROOFS_PARAMETER_CACHE: "/tmp/filecoin-proof-parameters/"
Expand Down Expand Up @@ -207,6 +211,38 @@ jobs:
cargo +<< pipeline.parameters.nightly-toolchain >> -Zpackage-features test --all --verbose --no-default-features --features << parameters.features >>
no_output_timeout: 30m

test_arm_no_gpu:
executor: arm
environment: *setup-env
parameters:
features:
type: string
steps:
- checkout
- attach_workspace:
at: "."
- restore_rustup_cache
- restore_parameter_cache
- run:
name: Install Rust
command: |
curl https://sh.rustup.rs -sSf | sh -s -- -y
- run: rustup install $(cat rust-toolchain)
- run: rustup default $(cat rust-toolchain)
- run: rustup install << pipeline.parameters.nightly-toolchain >>
- run: cargo update
- run: cargo fetch
- run:
name: Install required libraries
command: |
sudo apt-get update -y
sudo apt install -y libhwloc-dev
- run:
name: Test arm with no gpu (<< parameters.features >>)
command: |
cargo +<< pipeline.parameters.nightly-toolchain >> -Zpackage-features test --release --all --verbose --no-default-features --features << parameters.features >>
no_output_timeout: 90m

test_blst:
executor: default
environment: *setup-env
Expand Down Expand Up @@ -584,6 +620,19 @@ workflows:
- cargo_fetch
- ensure_groth_parameters_and_keys_linux

- test_arm_no_gpu:
name: test_arm_no_gpu_pairing
features: 'pairing'
requires:
- cargo_fetch
- ensure_groth_parameters_and_keys_linux

- test_arm_no_gpu:
name: test_arm_no_gpu_blst
features: 'blst'
requires:
- cargo_fetch
- ensure_groth_parameters_and_keys_linux

- bench:
requires:
Expand Down
23 changes: 18 additions & 5 deletions fr32/src/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@ const NUM_U128S_PER_BLOCK: usize = NUM_BYTES_OUT_BLOCK / size_of::<u128>();

const MASK_SKIP_HIGH_2: u128 = 0b0011_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111_1111;

#[repr(align(16))]
dignifiedquire marked this conversation as resolved.
Show resolved Hide resolved
struct AlignedBuffer([u8; NUM_BYTES_IN_BLOCK + 1]);

/// An `io::Reader` that converts unpadded input into valid `Fr32` padded output.
pub struct Fr32Reader<R> {
/// The source being padded.
source: R,
/// Currently read block.
/// This is padded to 128 bytes to allow reading all values as `u128`s, but only the first
/// 127 bytes are ever valid.
in_buffer: [u8; NUM_BYTES_IN_BLOCK + 1],
in_buffer: AlignedBuffer,
/// Currently writing out block.
out_buffer: [u128; NUM_U128S_PER_BLOCK],
/// The current offset into the `out_buffer` in bytes.
Expand Down Expand Up @@ -55,7 +58,7 @@ impl<R: Read> Fr32Reader<R> {
pub fn new(source: R) -> Self {
Fr32Reader {
source,
in_buffer: [0; NUM_BYTES_IN_BLOCK + 1],
in_buffer: AlignedBuffer([0; NUM_BYTES_IN_BLOCK + 1]),
out_buffer: [0; NUM_U128S_PER_BLOCK],
out_offset: 0,
available_frs: 0,
Expand All @@ -65,7 +68,17 @@ impl<R: Read> Fr32Reader<R> {

/// Processes a single block in in_buffer, writing the result to out_buffer.
fn process_block(&mut self) {
let in_buffer: &[u128] = self.in_buffer.as_slice_of::<u128>().unwrap();
let in_buffer: &[u128] = {
#[cfg(target_arch = "aarch64")]
// Safety: This is safe because the struct/data is aligned on
// a 16 byte boundary and can therefore be casted from u128
// to u8 without alignment safety issues.
unsafe {
dignifiedquire marked this conversation as resolved.
Show resolved Hide resolved
&mut (*(&self.in_buffer.0 as *const [u8] as *mut [u128]))
}
#[cfg(not(target_arch = "aarch64"))]
self.in_buffer.0.as_slice_of::<u128>().unwrap()
};
let out = &mut self.out_buffer;

// 0..254
Expand All @@ -86,7 +99,7 @@ impl<R: Read> Fr32Reader<R> {

fn fill_in_buffer(&mut self) -> io::Result<usize> {
let mut bytes_read = 0;
let mut buf = &mut self.in_buffer[..NUM_BYTES_IN_BLOCK];
let mut buf = &mut self.in_buffer.0[..NUM_BYTES_IN_BLOCK];

while !buf.is_empty() {
match self.source.read(buf) {
Expand All @@ -103,7 +116,7 @@ impl<R: Read> Fr32Reader<R> {
}

// Clear unfilled memory.
for val in &mut self.in_buffer[bytes_read..NUM_BYTES_IN_BLOCK] {
for val in &mut self.in_buffer.0[bytes_read..NUM_BYTES_IN_BLOCK] {
*val = 0;
}

Expand Down
9 changes: 8 additions & 1 deletion sha2raw/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,14 @@ rand = "0.7.3"
rand_xorshift = "0.2.0"

[features]

[target.'cfg(all(target_arch = "aarch64", not(target_os = "macos")))'.features]
default = ["asm"]
[target.'cfg(not(target_arch = "aarch64"))'.features]
default = ["asm"]
asm = ["sha2-asm"]
[target.'cfg(all(target_arch = "aarch64", target_os = "macos"))'.features]
default = []

asm = ["sha2-asm", "asm"]


4 changes: 3 additions & 1 deletion sha2raw/src/platform.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use crate::{sha256_intrinsics, sha256_utils};
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
use crate::sha256_intrinsics;
use crate::sha256_utils;

#[allow(dead_code)]
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
Expand Down
1 change: 1 addition & 0 deletions storage-proofs-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#![warn(clippy::ptr_arg)]
#![warn(clippy::unnecessary_lazy_evaluations)]
#![warn(clippy::redundant_slicing)]
#![allow(bare_trait_objects)]

use std::convert::TryInto;

Expand Down
7 changes: 4 additions & 3 deletions storage-proofs-porep/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,10 @@ libc = "0.2"
fdlimit = "0.2.0"
fr32 = { path = "../fr32", version = "^0.2.0", default-features = false }

[target."cfg(target_arch = \"aarch64\")".dependencies]
sha2 = { version = "0.9.3", features = ["compress", "asm"] }
[target."cfg(not(target_arch = \"aarch64\"))".dependencies]
#[target."cfg(target_arch = \"aarch64\")".dependencies]
#sha2 = { version = "0.9.3", features = ["compress", "asm"] }
#[target."cfg(not(target_arch = \"aarch64\"))".dependencies]
#sha2 = { version = "0.9.3", features = ["compress"] }
sha2 = { version = "0.9.3", features = ["compress"] }

[dev-dependencies]
Expand Down