From b44faff96f74c4e8ee6908d571811a6580669de5 Mon Sep 17 00:00:00 2001 From: Philip Craig Date: Tue, 20 Aug 2024 13:59:12 +1000 Subject: [PATCH] write: add `LineConvert` This allow reuse of the implementation of `LineProgram::from` as part of a more complex transformation. Operation of `LineProgram::from` is mostly unchanged. The one difference is that it now uses the string form chosen by `LineString::new` instead of copying the form of the input. --- src/read/dwarf.rs | 24 +++ src/write/dwarf.rs | 14 +- src/write/line.rs | 502 ++++++++++++++++++++++++++++++++------------- src/write/unit.rs | 4 +- 4 files changed, 397 insertions(+), 147 deletions(-) diff --git a/src/read/dwarf.rs b/src/read/dwarf.rs index c836d8457..43e99081d 100644 --- a/src/read/dwarf.rs +++ b/src/read/dwarf.rs @@ -420,6 +420,30 @@ impl Dwarf { } } + /// Return an attribute value as a string slice. + /// This only handles forms that are usable without an associated unit. + /// + /// If the attribute value is one of: + /// + /// - an inline `DW_FORM_string` string + /// - a `DW_FORM_strp` reference to an offset into the `.debug_str` section + /// - a `DW_FORM_strp_sup` reference to an offset into a supplementary + /// object file + /// - a `DW_FORM_line_strp` reference to an offset into the `.debug_line_str` + /// section + /// + /// then return the attribute's string value. Returns an error if the attribute + /// value does not have a string form, or if a string form has an invalid value. + pub fn attr_line_string(&self, attr: AttributeValue) -> Result { + match attr { + AttributeValue::String(string) => Ok(string), + AttributeValue::DebugStrRef(offset) => self.string(offset), + AttributeValue::DebugStrRefSup(offset) => self.sup_string(offset), + AttributeValue::DebugLineStrRef(offset) => self.line_string(offset), + _ => Err(Error::ExpectedStringAttributeValue), + } + } + /// Return the address at the given index. pub fn address(&self, unit: &Unit, index: DebugAddrIndex) -> Result { self.debug_addr diff --git a/src/write/dwarf.rs b/src/write/dwarf.rs index ea507126a..59c2a6d9a 100644 --- a/src/write/dwarf.rs +++ b/src/write/dwarf.rs @@ -2,8 +2,8 @@ use alloc::vec::Vec; use crate::common::Encoding; use crate::write::{ - AbbreviationTable, LineProgram, LineStringTable, Result, Sections, StringTable, Unit, - UnitTable, Writer, + AbbreviationTable, LineProgram, LineString, LineStringTable, Result, Sections, StringTable, + Unit, UnitTable, Writer, }; /// Writable DWARF information for more than one unit. @@ -48,6 +48,11 @@ impl Dwarf { } Ok(()) } + + /// Get a reference to the data for a line string. + pub fn get_line_string<'a>(&'a self, string: &'a LineString) -> &'a [u8] { + string.get(&self.strings, &self.line_strings) + } } /// Writable DWARF information for a single unit. @@ -102,6 +107,11 @@ impl DwarfUnit { abbrevs.write(&mut sections.debug_abbrev)?; Ok(()) } + + /// Get a reference to the data for a line string. + pub fn get_line_string<'a>(&'a self, string: &'a LineString) -> &'a [u8] { + string.get(&self.strings, &self.line_strings) + } } #[cfg(feature = "read")] diff --git a/src/write/line.rs b/src/write/line.rs index 0e5c06243..cf4c48bb7 100644 --- a/src/write/line.rs +++ b/src/write/line.rs @@ -7,7 +7,7 @@ use crate::constants; use crate::leb128; use crate::write::{ Address, DebugLineStrOffsets, DebugStrOffsets, Error, LineStringId, LineStringTable, Result, - Section, StringId, Writer, + Section, StringId, StringTable, Writer, }; /// The number assigned to the first special opcode. @@ -853,6 +853,19 @@ impl LineString { } } + /// Get a reference to the string data. + pub fn get<'a>( + &'a self, + strings: &'a StringTable, + line_strings: &'a LineStringTable, + ) -> &'a [u8] { + match self { + LineString::String(val) => val, + LineString::StringRef(val) => strings.get(*val), + LineString::LineStringRef(val) => line_strings.get(*val), + } + } + fn form(&self) -> constants::DwForm { match *self { LineString::String(..) => constants::DW_FORM_string, @@ -992,6 +1005,8 @@ define_section!( "A writable `.debug_line` section." ); +#[cfg(feature = "read")] +pub use convert::*; #[cfg(feature = "read")] mod convert { use super::*; @@ -1003,177 +1018,378 @@ mod convert { /// /// Return the program and a mapping from file index to `FileId`. pub fn from>( - mut from_program: read::IncompleteLineProgram, - dwarf: &read::Dwarf, + from_program: read::IncompleteLineProgram, + from_dwarf: &read::Dwarf, line_strings: &mut write::LineStringTable, strings: &mut write::StringTable, convert_address: &dyn Fn(u64) -> Option
, ) -> ConvertResult<(LineProgram, Vec)> { + let encoding = from_program.header().encoding(); + let line_encoding = from_program.header().line_encoding(); + let comp_name = match from_program.header().file(0) { + Some(file) => Some(from_dwarf.attr_line_string(file.path_name())?), + None => None, + }; + let mut convert = LineConvert::new( + from_dwarf, + from_program, + comp_name, + encoding, + line_encoding, + line_strings, + strings, + )?; + + while let Some(row) = convert.read_row()? { + match row { + LineConvertRow::SetAddress(address, row) => { + if convert.in_sequence() { + return Err(ConvertError::InvalidAddress); + } + let address = + convert_address(address).ok_or(ConvertError::InvalidAddress)?; + convert.begin_sequence(Some(address)); + convert.generate_row(row); + } + LineConvertRow::Row(row) => { + if !convert.in_sequence() { + convert.begin_sequence(None); + } + convert.generate_row(row); + } + LineConvertRow::EndSequence(length) => { + if !convert.in_sequence() { + convert.begin_sequence(None); + } + convert.end_sequence(length); + } + } + } + Ok(convert.program()) + } + } + + /// The result of [`LineConvert::read_row`]. + #[derive(Debug)] + pub enum LineConvertRow { + /// A row that used a `DW_LNS_set_address` instruction. + /// + /// This is expected to be the first row in a sequence, + /// but [`LineConvert::read_row`] doesn't enforce that. + /// + /// This row will have its `address_offset` field set to 0. + /// All subsequent rows in the sequence will have their `address_offset` + /// field set to an offset from this address. + SetAddress(u64, LineRow), + /// A row produced by the line number program. + Row(LineRow), + /// The address offset of the end of the sequence. + EndSequence(u64), + } + + /// The result of [`LineConvert::read_sequence`]. + #[derive(Debug)] + pub struct LineConvertSequence { + /// The address of the first instruction in the sequence. + pub start: Option, + /// The offset in bytes of the next instruction after the sequence. + pub length: u64, + /// The rows in the sequence. + /// + /// The `LineRow::address` fields are set to an offset from `Self::start`. + pub rows: Vec, + } + + /// The state for the conversion of a line number program. + #[derive(Debug)] + pub struct LineConvert<'a, R: Reader> { + #[allow(unused)] + from_dwarf: &'a read::Dwarf, + from_program: read::IncompleteLineProgram, + from_row: read::LineRow, + from_instructions: read::LineInstructions, + files: Vec, + dirs: Vec, + program: LineProgram, + #[allow(unused)] + line_strings: &'a mut write::LineStringTable, + #[allow(unused)] + strings: &'a mut write::StringTable, + } + + impl<'a, R: Reader + 'a> LineConvert<'a, R> { + /// Start a new conversion of a line number program. + /// + /// `encoding` and `line_encoding` apply to the converted program, and + /// may be different from the source program. + pub fn new( + from_dwarf: &'a read::Dwarf, + from_program: read::IncompleteLineProgram, + from_comp_name: Option, + encoding: Encoding, + line_encoding: LineEncoding, + line_strings: &'a mut write::LineStringTable, + strings: &'a mut write::StringTable, + ) -> ConvertResult { // Create mappings in case the source has duplicate files or directories. let mut dirs = Vec::new(); let mut files = Vec::new(); - let mut program = { - let from_header = from_program.header(); - let encoding = from_header.encoding(); + let from_header = from_program.header(); - let comp_dir = match from_header.directory(0) { - Some(comp_dir) => LineString::from(comp_dir, dwarf, line_strings, strings)?, - None => LineString::new(&[][..], encoding, line_strings), - }; + let comp_dir = match from_header.directory(0) { + Some(comp_dir) => { + Self::convert_string(comp_dir, from_dwarf, encoding, line_strings)? + } + None => LineString::new(&[][..], encoding, line_strings), + }; - let comp_name = match from_header.file(0) { - Some(comp_file) => { - if comp_file.directory_index() != 0 { - return Err(ConvertError::InvalidDirectoryIndex); - } - LineString::from(comp_file.path_name(), dwarf, line_strings, strings)? - } - None => LineString::new(&[][..], encoding, line_strings), - }; + let comp_name = match from_comp_name { + Some(comp_name) => LineString::new(comp_name.to_slice()?, encoding, line_strings), + None => LineString::new(&[][..], encoding, line_strings), + }; - if from_header.line_base() > 0 { - return Err(ConvertError::InvalidLineBase); - } - let mut program = LineProgram::new( - encoding, - from_header.line_encoding(), - comp_dir, - comp_name, - None, // We'll set this later if needed when we add the file again. - ); - - if from_header.version() <= 4 { - // The first directory is implicit. - dirs.push(DirectoryId(0)); - // A file index of 0 is invalid for version <= 4, but putting - // something there makes the indexing easier. - files.push(FileId::new(0)); - } + if from_header.line_base() > 0 { + return Err(ConvertError::InvalidLineBase); + } + let mut program = LineProgram::new( + encoding, + line_encoding, + comp_dir, + comp_name, + None, // We'll set this later if needed when we add the file again. + ); - for from_dir in from_header.include_directories() { - let from_dir = - LineString::from(from_dir.clone(), dwarf, line_strings, strings)?; - dirs.push(program.add_directory(from_dir)); - } + if from_header.version() <= 4 { + // The first directory is implicit. + dirs.push(DirectoryId(0)); + // A file index of 0 is invalid for version <= 4, but putting + // something there makes the indexing easier. + files.push(FileId::new(0)); + } - program.file_has_timestamp = from_header.file_has_timestamp(); - program.file_has_size = from_header.file_has_size(); - program.file_has_md5 = from_header.file_has_md5(); - program.file_has_source = from_header.file_has_source(); - for from_file in from_header.file_names().iter() { - let from_name = - LineString::from(from_file.path_name(), dwarf, line_strings, strings)?; - let from_dir = from_file.directory_index(); - if from_dir >= dirs.len() as u64 { - return Err(ConvertError::InvalidDirectoryIndex); - } - let from_dir = dirs[from_dir as usize]; - let from_info = Some(FileInfo { - timestamp: from_file.timestamp(), - size: from_file.size(), - md5: *from_file.md5(), - source: match from_file.source() { - Some(source) => { - Some(LineString::from(source, dwarf, line_strings, strings)?) - } - None => None, - }, - }); - files.push(program.add_file(from_name, from_dir, from_info)); - } + for from_attr in from_header.include_directories() { + let from_dir = + Self::convert_string(from_attr.clone(), from_dwarf, encoding, line_strings)?; + dirs.push(program.add_directory(from_dir)); + } - program - }; + program.file_has_timestamp = from_header.file_has_timestamp(); + program.file_has_size = from_header.file_has_size(); + program.file_has_md5 = from_header.file_has_md5(); + program.file_has_source = from_header.file_has_source(); + for from_file in from_header.file_names() { + let (from_name, from_dir, from_info) = + Self::convert_file(from_file, from_dwarf, &dirs, encoding, line_strings)?; + files.push(program.add_file(from_name, from_dir, from_info)); + } // We can't use the `from_program.rows()` because that wouldn't let // us preserve address relocations. - let mut from_row = read::LineRow::new(from_program.header()); - let mut instructions = from_program.header().instructions(); + let from_row = read::LineRow::new(from_program.header()); + let from_instructions = from_program.header().instructions(); + Ok(LineConvert { + from_dwarf, + from_program, + from_row, + from_instructions, + files, + dirs, + program, + line_strings, + strings, + }) + } + + fn convert_string( + from_attr: read::AttributeValue, + from_dwarf: &read::Dwarf, + encoding: Encoding, + line_strings: &mut write::LineStringTable, + ) -> ConvertResult { + let r = from_dwarf.attr_line_string(from_attr)?; + Ok(LineString::new(r.to_slice()?, encoding, line_strings)) + } + + fn convert_file( + from_file: &read::FileEntry, + from_dwarf: &read::Dwarf, + dirs: &[DirectoryId], + encoding: Encoding, + line_strings: &mut write::LineStringTable, + ) -> ConvertResult<(LineString, DirectoryId, Option)> { + let from_name = + Self::convert_string(from_file.path_name(), from_dwarf, encoding, line_strings)?; + let from_dir = from_file.directory_index(); + if from_dir >= dirs.len() as u64 { + return Err(ConvertError::InvalidDirectoryIndex); + } + let from_dir = dirs[from_dir as usize]; + let from_info = Some(FileInfo { + timestamp: from_file.timestamp(), + size: from_file.size(), + md5: *from_file.md5(), + source: match from_file.source() { + Some(source) => Some(Self::convert_string( + source, + from_dwarf, + encoding, + line_strings, + )?), + None => None, + }, + }); + Ok((from_name, from_dir, from_info)) + } + + /// Read the next row from the source program. + /// + /// Use [`LineConvert::generate_row`] to add the row to the program. + pub fn read_row(&mut self) -> ConvertResult> { + let mut tombstone = false; let mut address = None; - while let Some(instruction) = instructions.next_instruction(from_program.header())? { + self.from_row.reset(self.from_program.header()); + while let Some(instruction) = self + .from_instructions + .next_instruction(self.from_program.header())? + { match instruction { read::LineInstruction::SetAddress(val) => { - if program.in_sequence() { - return Err(ConvertError::UnsupportedLineInstruction); - } - match convert_address(val) { - Some(val) => address = Some(val), - None => return Err(ConvertError::InvalidAddress), + // Use address 0 so that all addresses are offsets. + self.from_row.execute( + read::LineInstruction::SetAddress(0), + &mut self.from_program, + )?; + // Handle tombstones the same way that `from_row.execute` would have. + let tombstone_address = + !0 >> (64 - self.from_program.header().encoding().address_size * 8); + tombstone = val == tombstone_address; + if !tombstone { + address = Some(val); } - from_row - .execute(read::LineInstruction::SetAddress(0), &mut from_program)?; + continue; } - read::LineInstruction::DefineFile(_) => { - return Err(ConvertError::UnsupportedLineInstruction); + read::LineInstruction::DefineFile(ref from_file) => { + let (from_name, from_dir, from_info) = Self::convert_file( + from_file, + self.from_dwarf, + &self.dirs, + self.program.encoding(), + self.line_strings, + )?; + self.files + .push(self.program.add_file(from_name, from_dir, from_info)); } - _ => { - if from_row.execute(instruction, &mut from_program)? { - if !program.in_sequence() { - program.begin_sequence(address); - address = None; - } - if from_row.end_sequence() { - program.end_sequence(from_row.address()); - } else { - program.row().address_offset = from_row.address(); - program.row().op_index = from_row.op_index(); - program.row().file = { - let file = from_row.file_index(); - if file >= files.len() as u64 { - return Err(ConvertError::InvalidFileIndex); - } - if file == 0 && program.version() <= 4 { - return Err(ConvertError::InvalidFileIndex); - } - files[file as usize] - }; - program.row().line = match from_row.line() { - Some(line) => line.get(), - None => 0, - }; - program.row().column = match from_row.column() { - read::ColumnType::LeftEdge => 0, - read::ColumnType::Column(val) => val.get(), - }; - program.row().discriminator = from_row.discriminator(); - program.row().is_statement = from_row.is_stmt(); - program.row().basic_block = from_row.basic_block(); - program.row().prologue_end = from_row.prologue_end(); - program.row().epilogue_begin = from_row.epilogue_begin(); - program.row().isa = from_row.isa(); - program.generate_row(); - } - from_row.reset(from_program.header()); - } + _ => {} + } + if !self.from_row.execute(instruction, &mut self.from_program)? { + // This instruction didn't generate a new row. + continue; + } + if tombstone { + // Perform any reset that was required for the tombstone row. + // Normally this is done when `read_row` is called again, but for + // tombstones we loop immediately. + if self.from_row.end_sequence() { + tombstone = false; + address = None; } + self.from_row.reset(self.from_program.header()); + continue; + } + if self.from_row.end_sequence() { + return Ok(Some(LineConvertRow::EndSequence(self.from_row.address()))); + } + let row = LineRow { + address_offset: self.from_row.address(), + op_index: self.from_row.op_index(), + file: { + let file = self.from_row.file_index(); + if file >= self.files.len() as u64 { + return Err(ConvertError::InvalidFileIndex); + } + if file == 0 && self.from_program.header().version() <= 4 { + return Err(ConvertError::InvalidFileIndex); + } + self.files[file as usize] + }, + line: match self.from_row.line() { + Some(line) => line.get(), + None => 0, + }, + column: match self.from_row.column() { + read::ColumnType::LeftEdge => 0, + read::ColumnType::Column(val) => val.get(), + }, + discriminator: self.from_row.discriminator(), + is_statement: self.from_row.is_stmt(), + basic_block: self.from_row.basic_block(), + prologue_end: self.from_row.prologue_end(), + epilogue_begin: self.from_row.epilogue_begin(), + isa: self.from_row.isa(), }; + if let Some(address) = address { + return Ok(Some(LineConvertRow::SetAddress(address, row))); + } else { + return Ok(Some(LineConvertRow::Row(row))); + } } - Ok((program, files)) + Ok(None) } - } - impl LineString { - fn from>( - from_attr: read::AttributeValue, - dwarf: &read::Dwarf, - line_strings: &mut write::LineStringTable, - strings: &mut write::StringTable, - ) -> ConvertResult { - Ok(match from_attr { - read::AttributeValue::String(r) => LineString::String(r.to_slice()?.to_vec()), - read::AttributeValue::DebugStrRef(offset) => { - let r = dwarf.debug_str.get_str(offset)?; - let id = strings.add(r.to_slice()?); - LineString::StringRef(id) - } - read::AttributeValue::DebugLineStrRef(offset) => { - let r = dwarf.debug_line_str.get_str(offset)?; - let id = line_strings.add(r.to_slice()?); - LineString::LineStringRef(id) + /// Read the next sequence from the source program. + pub fn read_sequence(&mut self) -> ConvertResult> { + let mut start = None; + let mut rows = Vec::new(); + while let Some(row) = self.read_row()? { + match row { + LineConvertRow::SetAddress(address, row) => { + // We only support the setting the address for the first row in a sequence. + if !rows.is_empty() { + return Err(ConvertError::InvalidAddress); + } + start = Some(address); + rows.push(row); + } + LineConvertRow::Row(row) => { + rows.push(row); + } + LineConvertRow::EndSequence(length) => { + return Ok(Some(LineConvertSequence { + start, + length, + rows, + })); + } } - _ => return Err(ConvertError::UnsupportedLineStringForm), - }) + } + Ok(None) + } + + /// Call [`LineProgram::begin_sequence`] for the converted program. + pub fn begin_sequence(&mut self, address: Option
) { + self.program.begin_sequence(address); + } + + /// Call [`LineProgram::end_sequence`] for the converted program. + pub fn end_sequence(&mut self, address_offset: u64) { + self.program.end_sequence(address_offset); + } + + /// Return [`LineProgram::in_sequence`] for the converted program. + pub fn in_sequence(&self) -> bool { + self.program.in_sequence() + } + + /// Set the next row and call [`LineProgram::generate_row`] for the converted program. + pub fn generate_row(&mut self, row: LineRow) { + *self.program.row() = row; + self.program.generate_row(); + } + + /// Return the program and a mapping from source file index to `FileId`. + pub fn program(self) -> (LineProgram, Vec) { + (self.program, self.files) } } } diff --git a/src/write/unit.rs b/src/write/unit.rs index c7f54e96f..261277dd5 100644 --- a/src/write/unit.rs +++ b/src/write/unit.rs @@ -2923,10 +2923,10 @@ mod tests { }; let convert_path = get_convert_path(constants::DW_AT_decl_file); - assert_eq!(convert_path, &file_string1); + assert_eq!(convert_dwarf.get_line_string(convert_path), file_bytes1); let convert_path = get_convert_path(constants::DW_AT_call_file); - assert_eq!(convert_path, &file_string2); + assert_eq!(convert_dwarf.get_line_string(convert_path), file_bytes2); } } }