Skip to content

Commit

Permalink
Merge pull request #241 from amomchilov/object_id
Browse files Browse the repository at this point in the history
Remove calls to `Object#object_id`
  • Loading branch information
boazsegev authored Nov 10, 2024
2 parents fe11372 + c11a7af commit 8108fb6
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 28 deletions.
18 changes: 9 additions & 9 deletions lib/combine_pdf/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -369,16 +369,16 @@ def _parse_
# the following was dicarded because some PDF files didn't have an EOL marker as required
# str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
# instead, a non-strict RegExp is used:


# raise error if the stream doesn't end.
unless @scanner.skip_until(/endstream/)
raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
end
length = @scanner.pos - (old_pos + 9)
length = 0 if(length < 0)
length -= 1 if(@scanner.string[old_pos + length - 1] == "\n")
length -= 1 if(@scanner.string[old_pos + length - 1] == "\r")
length -= 1 if(@scanner.string[old_pos + length - 1] == "\n")
length -= 1 if(@scanner.string[old_pos + length - 1] == "\r")
str = (length > 0) ? @scanner.string.slice(old_pos, length) : ''

# warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}"
Expand Down Expand Up @@ -632,17 +632,17 @@ def catalog_pages(catalogs = nil, inheritance_hash = {})
#
def serialize_objects_and_references
obj_dir = {}
objid_cache = {}
objid_cache = {}.compare_by_identity
# create a dictionary for referenced objects (no value resolution at this point)
# at the same time, delete duplicates and old versions when objects have multiple versions
@parsed.uniq!
@parsed.length.times do |i|
o = @parsed[i]
objid_cache[o.object_id] = i
objid_cache[o] = i
tmp_key = [o[:indirect_reference_id], o[:indirect_generation_number]]
if tmp_found = obj_dir[tmp_key]
tmp_found.clear
@parsed[objid_cache[tmp_found.object_id]] = nil
@parsed[objid_cache[tmp_found]] = nil
end
obj_dir[tmp_key] = o
end
Expand Down Expand Up @@ -765,9 +765,9 @@ def unify_string(str)
# end

# # run block of code on evey PDF object (PDF objects are class Hash)
# def each_object(object, limit_references = true, already_visited = {}, &block)
# def each_object(object, limit_references = true, already_visited = {}.compare_by_identity, &block)
# unless limit_references
# already_visited[object.object_id] = true
# already_visited[object] = true
# end
# case
# when object.is_a?(Array)
Expand All @@ -776,7 +776,7 @@ def unify_string(str)
# yield(object)
# unless limit_references && object[:is_reference_only]
# object.each do |k,v|
# each_object(v, limit_references, already_visited, &block) unless already_visited[v.object_id]
# each_object(v, limit_references, already_visited, &block) unless already_visited[v]
# end
# end
# end
Expand Down
38 changes: 19 additions & 19 deletions lib/combine_pdf/pdf_protected.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,19 @@ class PDF
# this is used for internal operations, such as injectng data using the << operator.
def add_referenced()
# an existing object map
resolved = {}.dup
existing = {}.dup
should_resolve = [].dup
resolved = {}.compare_by_identity
existing = {}
should_resolve = []
#set all existing objects as resolved and register their children for future resolution
@objects.each { |obj| existing[obj] = obj ; resolved[obj.object_id] = obj; should_resolve << obj.values}
@objects.each { |obj| existing[obj] = obj ; resolved[obj] = obj; should_resolve << obj.values}
# loop until should_resolve is empty
while should_resolve.any?
obj = should_resolve.pop
next if resolved[obj.object_id] # the object exists
next if resolved[obj] # the object exists
if obj.is_a?(Hash)
referenced = obj[:referenced_object]
if referenced && referenced.any?
tmp = resolved[referenced.object_id]
tmp = resolved[referenced]
if !tmp && referenced[:raw_stream_content]
tmp = existing[referenced[:raw_stream_content]]
# Avoid endless recursion by limiting it to a number of layers (default == 2)
Expand All @@ -42,18 +42,18 @@ def add_referenced()
if tmp
obj[:referenced_object] = tmp
else
resolved[obj.object_id] = referenced
resolved[obj] = referenced
# existing[referenced] = referenced
existing[referenced[:raw_stream_content]] = referenced
should_resolve << referenced
@objects << referenced
end
else
resolved[obj.object_id] = obj
obj.keys.each { |k| should_resolve << obj[k] unless !obj[k].is_a?(Enumerable) || resolved[obj[k].object_id] }
resolved[obj] = obj
obj.keys.each { |k| should_resolve << obj[k] unless !obj[k].is_a?(Enumerable) || resolved[obj[k]] }
end
elsif obj.is_a?(Array)
resolved[obj.object_id] = obj
resolved[obj] = obj
should_resolve.concat obj
end
end
Expand All @@ -78,14 +78,14 @@ def rebuild_catalog(*with_pages)
page_list.concat(with_pages) unless with_pages.empty?

# duplicate any non-unique pages - This is a special case to resolve Adobe Acrobat Reader issues (see issues #19 and #81)
uniqueness = {}.dup
page_list.each { |page| page = page[:referenced_object] || page; page = page.dup if uniqueness[page.object_id]; uniqueness[page.object_id] = page }
uniqueness = {}.compare_by_identity
page_list.each { |page| page = page[:referenced_object] || page; page = page.dup if uniqueness[page]; uniqueness[page] = page }
page_list.clear
page_list = uniqueness.values
uniqueness.clear

# build new Pages object
page_object_kids = [].dup
page_object_kids = []
pages_object = { Type: :Pages, Count: page_list.length, Kids: page_object_kids }
pages_object_reference = { referenced_object: pages_object, is_reference_only: true }
page_list.each { |pg| pg[:Parent] = pages_object_reference; page_object_kids << ({ referenced_object: pg, is_reference_only: true }) }
Expand Down Expand Up @@ -192,11 +192,11 @@ def rebuild_names(name_tree = nil, base = 'CombinePDF_0000000')
dic = []
# map a names tree and return a valid name tree. Do not recourse.
should_resolve = [name_tree[:Kids], name_tree[:Names]]
resolved = [].to_set
resolved = Set.new.compare_by_identity
while should_resolve.any?
pos = should_resolve.pop
if pos.is_a? Array
next if resolved.include?(pos.object_id)
next if resolved.include?(pos)
if pos[0].is_a? String
(pos.length / 2).times do |i|
dic << (pos[i * 2].clear << base.next!)
Expand All @@ -209,16 +209,16 @@ def rebuild_names(name_tree = nil, base = 'CombinePDF_0000000')
end
elsif pos.is_a? Hash
pos = pos[:referenced_object] || pos
next if resolved.include?(pos.object_id)
next if resolved.include?(pos)
should_resolve << pos[:Kids] if pos[:Kids]
should_resolve << pos[:Names] if pos[:Names]
end
resolved << pos.object_id
resolved << pos
end
return { referenced_object: { Names: dic }, is_reference_only: true }
end
@names ||= @names[:referenced_object]
new_names = { Type: :Names }.dup
new_names = { Type: :Names }
POSSIBLE_NAME_TREES.each do |ntree|
if @names[ntree]
new_names[ntree] = rebuild_names(@names[ntree], base)
Expand Down Expand Up @@ -373,7 +373,7 @@ def print_outline_to_file(outline, file)
private

def equal_layers obj1, obj2, layer = CombinePDF.eq_depth_limit
return true if obj1.object_id == obj2.object_id
return true if obj1.equal?(obj2)
if obj1.is_a? Hash
return false unless obj2.is_a? Hash
return false unless obj1.length == obj2.length
Expand Down

0 comments on commit 8108fb6

Please sign in to comment.