diff --git a/Cargo.toml b/Cargo.toml index 25b0fd6a..7304fd75 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -14,7 +14,8 @@ exclude = ["fonts/*"] [dev-dependencies] hex = "0.4.3" sha2 = "0.10.8" -ttf-parser = "0.20.0" + [dependencies] pico-args = "0.5.0" +ttf-parser = {path = "../ttf-parser"} diff --git a/run.sh b/run.sh index ee7a6f96..00efb2f9 100755 --- a/run.sh +++ b/run.sh @@ -1,5 +1,5 @@ FONT="fonts/NotoSansCJKsc-Regular.otf" -GIDS="161" +GIDS="0-5000" fonttools subset $FONT --drop-tables=GSUB,GPOS,GDEF,FFTM,vhea,vmtx,DSIG,VORG,cmap,hdmx \ --gids=$GIDS --glyph-names --output-file=out_ft.otf \ diff --git a/src/cff/charstring.rs b/src/cff/charstring.rs index e877fbca..f11f4ac1 100644 --- a/src/cff/charstring.rs +++ b/src/cff/charstring.rs @@ -10,9 +10,8 @@ use crate::{Error, Result}; use std::cell::RefCell; use std::collections::BTreeSet; use std::fmt::{Debug, Formatter}; -use std::rc::Rc; -type SharedCharString<'a> = RefCell>; +pub type SharedCharString<'a> = RefCell>; #[derive(Clone, Copy)] pub struct Fixed<'a>(i32, &'a [u8]); @@ -288,14 +287,18 @@ impl<'a> CharString<'a> { .pop() .and_then(|n| n.as_i32()) .ok_or(MalformedFont)?; - let gsubr_index = - conv_subroutine_index(biased_index, decompiler.gsubrs_bias) - .ok_or(MalformedFont)?; + let gsubr_index = unapply_bias(biased_index, decompiler.gsubrs_bias) + .ok_or(MalformedFont)?; let gsubr = decompiler .gsubrs .get(gsubr_index as usize) .ok_or(MalformedFont)?; gsubr.borrow_mut().decompile(decompiler)?; + println!( + "index: {:?}, len: {:?}", + gsubr_index, + gsubr.borrow().program.len() + ); self.used_gsubs.insert(gsubr_index); // Make sure used lsubs and gsubs are propagated transitively. self.used_lsubs.extend(&gsubr.borrow().used_lsubs); @@ -311,9 +314,8 @@ impl<'a> CharString<'a> { .pop() .and_then(|n| n.as_i32()) .ok_or(MalformedFont)?; - let lsubr_index = - conv_subroutine_index(biased_index, decompiler.lsubrs_bias) - .ok_or(MalformedFont)?; + let lsubr_index = unapply_bias(biased_index, decompiler.lsubrs_bias) + .ok_or(MalformedFont)?; let lsubr = decompiler .lsubrs .get(lsubr_index as usize) @@ -365,9 +367,15 @@ pub fn calc_subroutine_bias(len: u32) -> u16 { } } -fn conv_subroutine_index(index: i32, bias: u16) -> Option { +pub fn unapply_bias(index: i32, bias: u16) -> Option { let bias = i32::from(bias); let index = index.checked_add(bias)?; u32::try_from(index).ok() } + +pub fn apply_bias(index: i32, bias: u16) -> Option { + let bias = i32::from(bias); + + index.checked_sub(bias) +} diff --git a/src/cff/mod.rs b/src/cff/mod.rs index 50807bb2..f48d4cb8 100644 --- a/src/cff/mod.rs +++ b/src/cff/mod.rs @@ -10,24 +10,30 @@ mod private_dict; mod top_dict; // mod subset; mod charstring; +mod remapper; use super::*; use crate::cff::charset::{parse_charset, Charset}; -use crate::cff::charstring::{CharString, Decompiler}; +use crate::cff::charstring::{ + apply_bias, calc_subroutine_bias, CharString, Decompiler, Instruction, Program, + SharedCharString, +}; use crate::cff::dict::{DictionaryParser, Number}; use crate::cff::encoding::Encoding; use crate::cff::index::{parse_index, skip_index, Index, OffsetSize}; +use crate::cff::operator::CALL_GLOBAL_SUBROUTINE; use crate::cff::private_dict::parse_subr_offset; -use crate::cff::top_dict::parse_top_dict; +use crate::cff::remapper::SidRemapper; use crate::cff::top_dict::top_dict_operator::{ BASE_FONT_BLEND, BASE_FONT_NAME, COPYRIGHT, FAMILY_NAME, FONT_NAME, FULL_NAME, NOTICE, POSTSCRIPT, ROS, VERSION, WEIGHT, }; use crate::stream::{StringId, U24}; use crate::util::LazyArray16; +use remapper::Remapper; use std::array; use std::cell::RefCell; -use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::collections::BTreeSet; use std::hash::Hash; use std::ops::{Add, Range}; use top_dict::{top_dict_operator, TopDictData}; @@ -35,6 +41,7 @@ use top_dict::{top_dict_operator, TopDictData}; // Limits according to the Adobe Technical Note #5176, chapter 4 DICT Data. const MAX_OPERANDS_LEN: usize = 48; const MAX_ARGUMENTS_STACK_LEN: usize = 513; +const CUSTOM_SID: u16 = 392; /// A [Compact Font Format Table]( /// https://docs.microsoft.com/en-us/typography/opentype/spec/cff). @@ -53,55 +60,6 @@ pub struct Table<'a> { kind: Option>, } -#[derive(Debug, Clone)] -pub struct Remapper { - counter: T, - forward: BTreeMap, -} - -impl + Add + Default + Copy> Remapper { - pub fn new() -> Self { - Remapper::new_with_count(T::default()) - } - - pub fn new_with_count(count: T) -> Self { - let mut mapper = Self { counter: count, forward: BTreeMap::new() }; - mapper - } - - pub fn get(&self, old: T) -> Option { - self.forward.get(&old).copied() - } - - pub fn remap(&mut self, old: T) -> T { - *self.forward.entry(old).or_insert_with(|| { - let value = self.counter; - self.counter = self.counter + T::from(1); - value - }) - } - - // Add a method to return the iterator - pub fn iter(&self) -> RemapperIterator { - RemapperIterator { inner_iter: self.forward.iter() } - } -} - -impl<'a, T> Iterator for RemapperIterator<'a, T> -where - T: Ord + PartialEq + From + Add + Default + Copy, -{ - type Item = (&'a T, &'a T); - - fn next(&mut self) -> Option { - self.inner_iter.next() - } -} - -struct RemapperIterator<'a, T> { - inner_iter: std::collections::btree_map::Iter<'a, T, T>, -} - #[derive(Default)] struct FontWriteContext<'a> { // TOP DICT DATA @@ -169,6 +127,12 @@ pub fn subset<'a>(ctx: &mut Context<'a>) { assert_eq!(compiled, raw_charstring); } + let gsubr_bias = calc_subroutine_bias(gsubr_remapper.len()); + let lsubrs_bias = lsubr_remapper + .iter() + .map(|r| calc_subroutine_bias(r.len())) + .collect::>(); + let mut font_write_context = FontWriteContext::default(); let mut subsetted_font = vec![]; @@ -185,33 +149,78 @@ pub fn subset<'a>(ctx: &mut Context<'a>) { ); // STRINGS w.extend(&write_sids(&sid_remapper, table.strings).unwrap()); + // GSUBRS + w.extend(&write_gsubrs(&gsubr_remapper, gsubr_bias, &gsubrs).unwrap()); subsetted_font = w.finish(); + font_write_context.char_strings_offset = Number::from_i32(1); } + ttf_parser::cff::Table::parse(&subsetted_font); } -fn write_sids(sid_remapper: &Remapper, strings: Index) -> Result> { +fn write_gsubrs( + gsubr_remapper: &Remapper, + gsubr_bias: u16, + gsubrs: &[SharedCharString], +) -> Result> { + let mut new_gsubrs = vec![]; + + for (new, old) in gsubr_remapper.sorted().iter().enumerate() { + let new = new as u32; + let mut new_program = Program::default(); + let program = &gsubrs.get(*old as usize).unwrap().borrow().program; + + let mut iter = program.instructions().iter().peekable(); + + while let Some(instruction) = iter.next() { + match instruction { + Instruction::HintMask(mask) => { + new_program.push(Instruction::HintMask(*mask)) + } + Instruction::Operand(num) => { + if let Some(Instruction::SingleByteOperator(op)) = iter.peek() { + if *op == CALL_GLOBAL_SUBROUTINE { + let new_gsubr = apply_bias(new as i32, gsubr_bias).unwrap(); + new_program + .push(Instruction::Operand(Number::from_i32(new_gsubr))); + continue; + } + } + + new_program.push(Instruction::Operand(num.clone())) + } + // TODO: What if two gsubr/lsubr next to each other> + Instruction::DoubleByteOperator(op) => { + new_program.push(Instruction::DoubleByteOperator(*op)) + } + Instruction::SingleByteOperator(op) => { + new_program.push(Instruction::SingleByteOperator(*op)) + } + } + } + + let mut w = Writer::new(); + new_program.compile(&mut w); + new_gsubrs.push(w.finish()); + } + + create_index(new_gsubrs) +} + +fn write_sids(sid_remapper: &SidRemapper, strings: Index) -> Result> { let mut new_strings = vec![]; - for (_, old) in sid_remapper.iter() { + for (_, old) in sid_remapper.sorted().iter().enumerate() { new_strings .push(strings.get(old.checked_sub(391).unwrap() as u32).unwrap().to_vec()); } - println!( - "{:?}", - new_strings - .iter() - .map(|s| std::str::from_utf8(s).unwrap()) - .collect::>() - ); - create_index(new_strings) } fn write_top_dict( raw_top_dict: &[u8], font_write_context: &mut FontWriteContext, - sid_remapper: &Remapper, + sid_remapper: &SidRemapper, ) -> Result> { use top_dict_operator::*; @@ -303,6 +312,11 @@ fn create_index(data: Vec>) -> Result> { // + 1 Since we start counting from the preceding byte. let offsize = data.iter().map(|v| v.len() as u32).sum::() + 1; + // Empty Index only contains the count field + if count == 0 { + return Ok(vec![0, 0]); + } + let offset_size = if offsize <= u8::MAX as u32 { OffsetSize::Size1 } else if offsize <= u16::MAX as u32 { @@ -315,10 +329,13 @@ fn create_index(data: Vec>) -> Result> { let mut w = Writer::new(); w.write(count); + w.write(offset_size as u8); - let mut cur_offset: u32 = 1; + let mut cur_offset: u32 = 0; let mut write_offset = |len| { + cur_offset += len; + match offset_size { OffsetSize::Size1 => { let num = u8::try_from(cur_offset).map_err(|_| MalformedFont)?; @@ -335,7 +352,6 @@ fn create_index(data: Vec>) -> Result> { OffsetSize::Size4 => w.write(cur_offset), } - cur_offset += len as u32; Ok(()) }; @@ -351,10 +367,10 @@ fn create_index(data: Vec>) -> Result> { Ok(w.finish()) } -fn get_sid_remapper(ctx: &Context, used_sids: &BTreeSet) -> Remapper { +fn get_sid_remapper(ctx: &Context, used_sids: &BTreeSet) -> SidRemapper { // SIDs can appear in the top dict and charset // There are 391 standard strings, so we need to start from 392 - let mut sid_remapper = Remapper::new_with_count(392); + let mut sid_remapper = SidRemapper::new(); for sid in used_sids { sid_remapper.remap(sid.0); } diff --git a/src/cff/remapper.rs b/src/cff/remapper.rs new file mode 100644 index 00000000..ea5400eb --- /dev/null +++ b/src/cff/remapper.rs @@ -0,0 +1,76 @@ +use crate::cff::CUSTOM_SID; +use crate::stream::StringId; +use std::collections::BTreeMap; +use std::ops::Add; + +#[derive(Debug, Clone)] +pub struct Remapper { + counter: T, + forward: BTreeMap, + backward: Vec, +} + +impl + Add + Default + Copy> Remapper { + pub fn new() -> Self { + Remapper::new_with_count(T::default()) + } + + fn new_with_count(count: T) -> Self { + let mut mapper = Self { + counter: count, + forward: BTreeMap::new(), + backward: Vec::new(), + }; + mapper + } + + pub fn get(&self, old: T) -> Option { + self.forward.get(&old).copied() + } + + pub fn remap(&mut self, old: T) -> T { + *self.forward.entry(old).or_insert_with(|| { + let value = self.counter; + self.backward.push(old); + self.counter = self.counter + T::from(1); + value + }) + } + + pub fn len(&self) -> u32 { + self.forward.len() as u32 + } + + // Add a method to return the iterator + pub fn sorted(&self) -> &[T] { + self.backward.as_ref() + } +} + +pub struct SidRemapper(Remapper); + +impl SidRemapper { + pub fn new() -> Self { + SidRemapper(Remapper::new_with_count(0)) + } + + pub fn get(&self, old: u16) -> Option { + if old < CUSTOM_SID { + return Some(old); + } else { + self.0.get(old) + } + } + + pub fn remap(&mut self, old: u16) -> u16 { + if old < CUSTOM_SID { + return CUSTOM_SID; + } else { + self.0.remap(old) + } + } + + pub fn sorted(&self) -> &[u16] { + self.0.sorted() + } +} diff --git a/src/main.rs b/src/main.rs index 70ec87cc..06ca9b95 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,7 +28,7 @@ fn main() { let args: Vec = env::args().collect(); // Read the raw font data. let data = std::fs::read(&args[1]).unwrap(); - let gids = parse_gids(&args.get(3).to_owned().unwrap_or(&"0-200".to_owned())); + let gids = parse_gids(&args.get(3).to_owned().unwrap_or(&"0-20".to_owned())); let mapper = GidMapper::from_gid_set(&gids); let sub = subset(&data, 0, &mapper).unwrap();