Represent small values as single bytes

This change leverages the SB/LB instructions to change the memory representation of all small enough values to make them fit in a single byte instead of a full word. The type size and passing calculations have been changed to align elements of structs and enums to full words. Structs and data section entries are filled with right-padding to align their elements to words. Enums are still left-padded. The Data section generation has been refactored to allow for these two padding modes. Arrays and slices contain no inner padding, byte sequences will now be properly consecutive and packed. Though, as a whole, they may be right padded in certain circumstances to maintain word alignment. Direct usages of LW/SW have been changed to LB/SB where appropriate. The LWDataId virtual instruction has been changed to LoadDataId to better represent the fact that it can load both word and byte sized values.
FuelLabs · Aug 16, 2023 · 65d5718 · 65d5718
1 parent e625a1c
commit 65d5718
Show file tree

Hide file tree

Showing 25 changed files with 1,157 additions and 521 deletions.
diff --git a/sway-core/src/asm_generation/finalized_asm.rs b/sway-core/src/asm_generation/finalized_asm.rs
@@ -118,7 +118,7 @@ fn to_bytecode_mut(
     // Some LWs are expanded into two ops to allow for data larger than one word, so we calculate
     // exactly how many ops will be generated to calculate the offset.
     let offset_to_data_section_in_bytes = ops.iter().fold(0, |acc, item| match &item.opcode {
-        AllocatedOpcode::LWDataId(_reg, data_label)
+        AllocatedOpcode::LoadDataId(_reg, data_label)
             if !data_section
                 .has_copy_type(data_label)
                 .expect("data label references non existent data -- internal error") =>

diff --git a/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs b/sway-core/src/asm_generation/fuel/allocated_abstract_instruction_set.rs
@@ -348,7 +348,7 @@ impl AllocatedAbstractInstructionSet {
                         let data_id =
                             data_section.insert_data_value(Entry::new_word(offset, None, None));
                         realized_ops.push(RealizedOp {
-                            opcode: AllocatedOpcode::LWDataId(r1, data_id),
+                            opcode: AllocatedOpcode::LoadDataId(r1, data_id),
                             owning_span,
                             comment,
                         });
@@ -414,7 +414,7 @@ impl AllocatedAbstractInstructionSet {
             Either::Right(Label(_)) => 0,
 
             // A special case for LWDataId which may be 1 or 2 ops, depending on the source size.
-            Either::Left(AllocatedOpcode::LWDataId(_, ref data_id)) => {
+            Either::Left(AllocatedOpcode::LoadDataId(_, ref data_id)) => {
                 let has_copy_type = data_section.has_copy_type(data_id).expect(
                     "Internal miscalculation in data section -- \
                         data id did not match up to any actual data",

diff --git a/sway-core/src/asm_generation/fuel/data_section.rs b/sway-core/src/asm_generation/fuel/data_section.rs
@@ -1,61 +1,90 @@
-use crate::asm_generation::from_ir::ir_type_size_in_bytes;
+use crate::{
+    asm_generation::from_ir::ir_type_size_in_bytes, size_bytes_round_up_to_word_alignment,
+};
 
 use sway_ir::{Constant, ConstantValue, Context};
 
 use std::{
     collections::BTreeMap,
     fmt::{self, Write},
+    iter::repeat,
 };
 
 // An entry in the data section.  It's important for the size to be correct, especially for unions
 // where the size could be larger than the represented value.
 #[derive(Clone, Debug)]
 pub struct Entry {
     value: Datum,
-    size: usize,
+    padding: Padding,
     // It is assumed, for now, that only configuration-time constants have a name. Otherwise, this
     // is `None`.
     name: Option<String>,
 }
 
 #[derive(Clone, Debug)]
 pub enum Datum {
+    Byte(u8),
     Word(u64),
     ByteArray(Vec<u8>),
     Collection(Vec<Entry>),
 }
 
+#[derive(Clone, Debug)]
+pub(crate) enum Padding {
+    Left { target_size: usize },
+    Right { target_size: usize },
+}
+
+impl Padding {
+    pub fn target_size(&self) -> usize {
+        use Padding::*;
+        match self {
+            Left { target_size } | Right { target_size } => *target_size,
+        }
+    }
+}
+
 impl Entry {
-    pub(crate) fn new_word(value: u64, size: Option<usize>, name: Option<String>) -> Entry {
+    pub(crate) fn new_byte(value: u8, name: Option<String>, padding: Option<Padding>) -> Entry {
+        Entry {
+            value: Datum::Byte(value),
+            padding: padding.unwrap_or(Padding::Right { target_size: 1 }),
+            name,
+        }
+    }
+
+    pub(crate) fn new_word(value: u64, name: Option<String>, padding: Option<Padding>) -> Entry {
         Entry {
             value: Datum::Word(value),
-            size: size.unwrap_or(8),
+            padding: padding.unwrap_or(Padding::Right { target_size: 8 }),
             name,
         }
     }
 
     pub(crate) fn new_byte_array(
         bytes: Vec<u8>,
-        size: Option<usize>,
         name: Option<String>,
+        padding: Option<Padding>,
     ) -> Entry {
-        let size = size.unwrap_or(bytes.len());
         Entry {
+            padding: padding.unwrap_or(Padding::Right {
+                target_size: bytes.len(),
+            }),
             value: Datum::ByteArray(bytes),
-            size,
             name,
         }
     }
 
     pub(crate) fn new_collection(
         elements: Vec<Entry>,
-        size: Option<usize>,
         name: Option<String>,
+        padding: Option<Padding>,
     ) -> Entry {
-        let size = size.unwrap_or_else(|| elements.iter().map(|el| el.size).sum());
         Entry {
+            padding: padding.unwrap_or(Padding::Right {
+                target_size: elements.iter().map(|el| el.padding.target_size()).sum(),
+            }),
             value: Datum::Collection(elements),
-            size,
             name,
         }
     }
@@ -64,11 +93,11 @@ impl Entry {
         context: &Context,
         constant: &Constant,
         name: Option<String>,
+        padding: Option<Padding>,
     ) -> Entry {
         // We have to do some painful special handling here for enums, which are tagged unions.
         // This really should be handled by the IR more explicitly and is something that will
         // hopefully be addressed by https://github.com/FuelLabs/sway/issues/2819#issuecomment-1256930392
-        let size = Some(ir_type_size_in_bytes(context, &constant.ty) as usize);
 
         // Is this constant a tagged union?
         if constant.ty.is_struct(context) {
@@ -81,43 +110,78 @@ impl Entry {
                 // we use unions (otherwise we should be generalising this a bit more).
                 if let ConstantValue::Struct(els) = &constant.value {
                     if els.len() == 2 {
-                        let tag_entry = Entry::from_constant(context, &els[0], None);
+                        let tag_entry = Entry::from_constant(context, &els[0], None, None);
 
                         // Here's the special case.  We need to get the size of the union and
                         // attach it to this constant entry which will be one of the variants.
-                        let mut val_entry = Entry::from_constant(context, &els[1], None);
-                        val_entry.size = ir_type_size_in_bytes(context, &field_tys[1]) as usize;
+                        let val_entry = {
+                            let target_size = size_bytes_round_up_to_word_alignment!(
+                                ir_type_size_in_bytes(context, &field_tys[1]) as usize
+                            );
+                            Entry::from_constant(
+                                context,
+                                &els[1],
+                                None,
+                                Some(Padding::Left { target_size }),
+                            )
+                        };
 
                         // Return here from our special case.
-                        return Entry::new_collection(vec![tag_entry, val_entry], size, name);
+                        return Entry::new_collection(vec![tag_entry, val_entry], name, padding);
                     }
                 }
             }
         };
 
         // Not a tagged union, no trickiness required.
         match &constant.value {
-            ConstantValue::Undef | ConstantValue::Unit => Entry::new_word(0, size, name),
-            ConstantValue::Bool(b) => Entry::new_word(u64::from(*b), size, name),
-            ConstantValue::Uint(u) => Entry::new_word(*u, size, name),
-            ConstantValue::U256(u) => Entry::new_byte_array(u.to_be_bytes().to_vec(), size, name),
-            ConstantValue::B256(bs) => Entry::new_byte_array(bs.to_vec(), size, name),
-            ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), size, name),
-
-            ConstantValue::Array(els) | ConstantValue::Struct(els) => Entry::new_collection(
+            ConstantValue::Undef | ConstantValue::Unit => Entry::new_byte(0, name, padding),
+            ConstantValue::Bool(b) => Entry::new_byte(u8::from(*b), name, padding),
+            ConstantValue::Uint(u) => {
+                if constant.ty.is_uint8(context) {
+                    Entry::new_byte(*u as u8, name, padding)
+                } else {
+                    Entry::new_word(*u, name, padding)
+                }
+            }
+            ConstantValue::U256(u) => {
+                Entry::new_byte_array(u.to_be_bytes().to_vec(), name, padding)
+            }
+            ConstantValue::B256(bs) => Entry::new_byte_array(bs.to_vec(), name, padding),
+            ConstantValue::String(bs) => Entry::new_byte_array(bs.clone(), name, padding),
+
+            ConstantValue::Array(els) => Entry::new_collection(
+                els.iter()
+                    .map(|el| Entry::from_constant(context, el, None, None))
+                    .collect(),
+                name,
+                padding,
+            ),
+            ConstantValue::Struct(els) => Entry::new_collection(
                 els.iter()
-                    .map(|el| Entry::from_constant(context, el, None))
+                    .map(|el| {
+                        let target_size = size_bytes_round_up_to_word_alignment!(
+                            ir_type_size_in_bytes(context, &el.ty) as usize
+                        );
+                        Entry::from_constant(
+                            context,
+                            el,
+                            None,
+                            Some(Padding::Right { target_size }),
+                        )
+                    })
                     .collect(),
-                size,
                 name,
+                padding,
             ),
         }
     }
 
     /// Converts a literal to a big-endian representation. This is padded to words.
     pub(crate) fn to_bytes(&self) -> Vec<u8> {
         // Get the big-endian byte representation of the basic value.
-        let mut bytes = match &self.value {
+        let bytes = match &self.value {
+            Datum::Byte(b) => vec![*b],
             Datum::Word(w) => w.to_be_bytes().to_vec(),
             Datum::ByteArray(bs) if bs.len() % 8 == 0 => bs.clone(),
             Datum::ByteArray(bs) => bs
@@ -129,18 +193,26 @@ impl Entry {
             Datum::Collection(els) => els.iter().flat_map(|el| el.to_bytes()).collect(),
         };
 
-        // Pad the size out to match the specified size.
-        if self.size > bytes.len() {
-            let mut pad = vec![0; self.size - bytes.len()];
-            pad.append(&mut bytes);
-            bytes = pad;
+        match self.padding {
+            Padding::Left { target_size } => {
+                let target_size = size_bytes_round_up_to_word_alignment!(target_size);
+                let left_pad = target_size.saturating_sub(bytes.len());
+                [repeat(0u8).take(left_pad).collect(), bytes].concat()
+            }
+            Padding::Right { target_size } => {
+                let target_size = size_bytes_round_up_to_word_alignment!(target_size);
+                let right_pad = target_size.saturating_sub(bytes.len());
+                [bytes, repeat(0u8).take(right_pad).collect()].concat()
+            }
         }
-
-        bytes
     }
 
     pub(crate) fn has_copy_type(&self) -> bool {
-        matches!(self.value, Datum::Word(_))
+        matches!(self.value, Datum::Word(_) | Datum::Byte(_))
+    }
+
+    pub(crate) fn is_byte(&self) -> bool {
+        matches!(self.value, Datum::Byte(_))
     }
 
     pub(crate) fn equiv(&self, entry: &Entry) -> bool {
@@ -218,6 +290,13 @@ impl DataSection {
             .map(|entry| entry.has_copy_type())
     }
 
+    /// Returns whether a specific [DataId] value has a byte entry.
+    pub(crate) fn is_byte(&self, id: &DataId) -> Option<bool> {
+        self.value_pairs
+            .get(id.0 as usize)
+            .map(|entry| entry.is_byte())
+    }
+
     /// When generating code, sometimes a hard-coded data pointer is needed to reference
     /// static values that have a length longer than one word.
     /// This method appends pointers to the end of the data section (thus, not altering the data
@@ -253,6 +332,7 @@ impl fmt::Display for DataSection {
     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
         fn display_entry(datum: &Datum) -> String {
             match datum {
+                Datum::Byte(w) => format!(".byte {w}"),
                 Datum::Word(w) => format!(".word {w}"),
                 Datum::ByteArray(bs) => {
                     let mut hex_str = String::new();