Skip to content

Commit

Permalink
Represent small values as single bytes
Browse files Browse the repository at this point in the history
This change leverages the SB/LB instructions to change the memory
representation of all small enough values to make them fit in a single
byte instead of a full word.

The type size and passing calculations have been changed to align
elements of structs and enums to full words.

Structs and data section entries are filled with right-padding to align
their elements to words. Enums are still left-padded.
The Data section generation has been refactored to allow for these two
padding modes.

Arrays and slices contain no inner padding, byte sequences will now be
properly consecutive and packed. Though, as a whole, they may be right
padded in certain circumstances to maintain word alignment.

Direct usages of LW/SW have been changed to LB/SB where appropriate.

The LWDataId virtual instruction has been changed to LoadDataId to
better represent the fact that it can load both word and byte sized
values.
  • Loading branch information
IGI-111 committed Aug 9, 2023
1 parent e625a1c commit fddc699
Show file tree
Hide file tree
Showing 21 changed files with 461 additions and 227 deletions.
2 changes: 1 addition & 1 deletion sway-core/src/asm_generation/finalized_asm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ fn to_bytecode_mut(
// Some LWs are expanded into two ops to allow for data larger than one word, so we calculate
// exactly how many ops will be generated to calculate the offset.
let offset_to_data_section_in_bytes = ops.iter().fold(0, |acc, item| match &item.opcode {
AllocatedOpcode::LWDataId(_reg, data_label)
AllocatedOpcode::LoadDataId(_reg, data_label)
if !data_section
.has_copy_type(data_label)
.expect("data label references non existent data -- internal error") =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -348,7 +348,7 @@ impl AllocatedAbstractInstructionSet {
let data_id =
data_section.insert_data_value(Entry::new_word(offset, None, None));
realized_ops.push(RealizedOp {
opcode: AllocatedOpcode::LWDataId(r1, data_id),
opcode: AllocatedOpcode::LoadDataId(r1, data_id),
owning_span,
comment,
});
Expand Down Expand Up @@ -414,7 +414,7 @@ impl AllocatedAbstractInstructionSet {
Either::Right(Label(_)) => 0,

// A special case for LWDataId which may be 1 or 2 ops, depending on the source size.
Either::Left(AllocatedOpcode::LWDataId(_, ref data_id)) => {
Either::Left(AllocatedOpcode::LoadDataId(_, ref data_id)) => {
let has_copy_type = data_section.has_copy_type(data_id).expect(
"Internal miscalculation in data section -- \
data id did not match up to any actual data",
Expand Down
148 changes: 114 additions & 34 deletions sway-core/src/asm_generation/fuel/data_section.rs
Original file line number Diff line number Diff line change
@@ -1,61 +1,90 @@
use crate::asm_generation::from_ir::ir_type_size_in_bytes;
use crate::{
asm_generation::from_ir::ir_type_size_in_bytes, size_bytes_round_up_to_word_alignment,
};

use sway_ir::{Constant, ConstantValue, Context};

use std::{
collections::BTreeMap,
fmt::{self, Write},
iter::repeat,
};

// An entry in the data section. It's important for the size to be correct, especially for unions
// where the size could be larger than the represented value.
#[derive(Clone, Debug)]
pub struct Entry {
value: Datum,
size: usize,
padding: Padding,
// It is assumed, for now, that only configuration-time constants have a name. Otherwise, this
// is `None`.
name: Option<String>,
}

#[derive(Clone, Debug)]
pub enum Datum {
Byte(u8),
Word(u64),
ByteArray(Vec<u8>),
Collection(Vec<Entry>),
}

#[derive(Clone, Debug)]
pub(crate) enum Padding {
Left { target_size: usize },
Right { target_size: usize },
}

impl Padding {
pub fn target_size(&self) -> usize {
use Padding::*;
match self {
Left { target_size } | Right { target_size } => *target_size,
}
}
}

impl Entry {
pub(crate) fn new_word(value: u64, size: Option<usize>, name: Option<String>) -> Entry {
pub(crate) fn new_byte(value: u8, name: Option<String>, padding: Option<Padding>) -> Entry {
Entry {
value: Datum::Byte(value),
padding: padding.unwrap_or(Padding::Right { target_size: 1 }),
name,
}
}

pub(crate) fn new_word(value: u64, name: Option<String>, padding: Option<Padding>) -> Entry {
Entry {
value: Datum::Word(value),
size: size.unwrap_or(8),
padding: padding.unwrap_or(Padding::Right { target_size: 8 }),
name,
}
}

pub(crate) fn new_byte_array(
bytes: Vec<u8>,
size: Option<usize>,
name: Option<String>,
padding: Option<Padding>,
) -> Entry {
let size = size.unwrap_or(bytes.len());
Entry {
padding: padding.unwrap_or(Padding::Right {
target_size: bytes.len(),
}),
value: Datum::ByteArray(bytes),
size,
name,
}
}

pub(crate) fn new_collection(
elements: Vec<Entry>,
size: Option<usize>,
name: Option<String>,
padding: Option<Padding>,
) -> Entry {
let size = size.unwrap_or_else(|| elements.iter().map(|el| el.size).sum());
Entry {
padding: padding.unwrap_or(Padding::Right {
target_size: elements.iter().map(|el| el.padding.target_size()).sum(),
}),
value: Datum::Collection(elements),
size,
name,
}
}
Expand All @@ -64,11 +93,11 @@ impl Entry {
context: &Context,
constant: &Constant,
name: Option<String>,
padding: Option<Padding>,
) -> Entry {
// We have to do some painful special handling here for enums, which are tagged unions.
// This really should be handled by the IR more explicitly and is something that will
// hopefully be addressed by https://github.com/FuelLabs/sway/issues/2819#issuecomment-1256930392
let size = Some(ir_type_size_in_bytes(context, &constant.ty) as usize);

// Is this constant a tagged union?
if constant.ty.is_struct(context) {
Expand All @@ -81,43 +110,78 @@ impl Entry {
// we use unions (otherwise we should be generalising this a bit more).
if let ConstantValue::Struct(els) = &constant.value {
if els.len() == 2 {
let tag_entry = Entry::from_constant(context, &els[0], None);
let tag_entry = Entry::from_constant(context, &els[0], None, None);

// Here's the special case. We need to get the size of the union and
// attach it to this constant entry which will be one of the variants.
let mut val_entry = Entry::from_constant(context, &els[1], None);
val_entry.size = ir_type_size_in_bytes(context, &field_tys[1]) as usize;
let val_entry = {
let target_size = size_bytes_round_up_to_word_alignment!(
ir_type_size_in_bytes(context, &field_tys[1]) as usize
);
Entry::from_constant(
context,
&els[1],
None,
Some(Padding::Left { target_size }),
)
};

// Return here from our special case.
return Entry::new_collection(vec![tag_entry, val_entry], size, name);
return Entry::new_collection(vec![tag_entry, val_entry], name, padding);
}
}
}
};

// Not a tagged union, no trickiness required.
match &constant.value {
ConstantValue::Undef | ConstantValue::Unit => Entry::new_word(0, size, name),
ConstantValue::Bool(b) => Entry::new_word(u64::from(*b), size, name),
ConstantValue::Uint(u) => Entry::new_word(*u, size, name),
ConstantValue::U256(u) => Entry::new_byte_array(u.to_be_bytes().to_vec(), size, name),
ConstantValue::B256(bs) => Entry::new_byte_array(bs.to_vec(), size, name),
ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), size, name),

ConstantValue::Array(els) | ConstantValue::Struct(els) => Entry::new_collection(
ConstantValue::Undef | ConstantValue::Unit => Entry::new_byte(0, name, padding),
ConstantValue::Bool(b) => Entry::new_byte(u8::from(*b), name, padding),
ConstantValue::Uint(u) => {
if constant.ty.is_uint8(context) {
Entry::new_byte(*u as u8, name, padding)
} else {
Entry::new_word(*u, name, padding)
}
}
ConstantValue::U256(u) => {
Entry::new_byte_array(u.to_be_bytes().to_vec(), name, padding)
}
ConstantValue::B256(bs) => Entry::new_byte_array(bs.to_vec(), name, padding),
ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), name, padding),

ConstantValue::Array(els) => Entry::new_collection(
els.iter()
.map(|el| Entry::from_constant(context, el, None, None))
.collect(),
name,
padding,
),
ConstantValue::Struct(els) => Entry::new_collection(
els.iter()
.map(|el| Entry::from_constant(context, el, None))
.map(|el| {
let target_size = size_bytes_round_up_to_word_alignment!(
ir_type_size_in_bytes(context, &el.ty) as usize
);
Entry::from_constant(
context,
el,
None,
Some(Padding::Right { target_size }),
)
})
.collect(),
size,
name,
padding,
),
}
}

/// Converts a literal to a big-endian representation. This is padded to words.
pub(crate) fn to_bytes(&self) -> Vec<u8> {
// Get the big-endian byte representation of the basic value.
let mut bytes = match &self.value {
let bytes = match &self.value {
Datum::Byte(b) => vec![*b],
Datum::Word(w) => w.to_be_bytes().to_vec(),
Datum::ByteArray(bs) if bs.len() % 8 == 0 => bs.clone(),
Datum::ByteArray(bs) => bs
Expand All @@ -129,18 +193,26 @@ impl Entry {
Datum::Collection(els) => els.iter().flat_map(|el| el.to_bytes()).collect(),
};

// Pad the size out to match the specified size.
if self.size > bytes.len() {
let mut pad = vec![0; self.size - bytes.len()];
pad.append(&mut bytes);
bytes = pad;
match self.padding {
Padding::Left { target_size } => {
let target_size = size_bytes_round_up_to_word_alignment!(target_size);
let left_pad = target_size.saturating_sub(bytes.len());
[repeat(0u8).take(left_pad).collect(), bytes].concat()
}
Padding::Right { target_size } => {
let target_size = size_bytes_round_up_to_word_alignment!(target_size);
let right_pad = target_size.saturating_sub(bytes.len());
[bytes, repeat(0u8).take(right_pad).collect()].concat()
}
}

bytes
}

pub(crate) fn has_copy_type(&self) -> bool {
matches!(self.value, Datum::Word(_))
matches!(self.value, Datum::Word(_) | Datum::Byte(_))
}

pub(crate) fn is_byte(&self) -> bool {
matches!(self.value, Datum::Byte(_))
}

pub(crate) fn equiv(&self, entry: &Entry) -> bool {
Expand Down Expand Up @@ -218,6 +290,13 @@ impl DataSection {
.map(|entry| entry.has_copy_type())
}

/// Returns whether a specific [DataId] value has a byte entry.
pub(crate) fn is_byte(&self, id: &DataId) -> Option<bool> {
self.value_pairs
.get(id.0 as usize)
.map(|entry| entry.is_byte())
}

/// When generating code, sometimes a hard-coded data pointer is needed to reference
/// static values that have a length longer than one word.
/// This method appends pointers to the end of the data section (thus, not altering the data
Expand Down Expand Up @@ -253,6 +332,7 @@ impl fmt::Display for DataSection {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn display_entry(datum: &Datum) -> String {
match datum {
Datum::Byte(w) => format!(".byte {w}"),
Datum::Word(w) => format!(".word {w}"),
Datum::ByteArray(bs) => {
let mut hex_str = String::new();
Expand Down
Loading

0 comments on commit fddc699

Please sign in to comment.