Skip to content

Commit

Permalink
♻️ Refactor stringprep table generation
Browse files Browse the repository at this point in the history
Tables are now generated into one table per file.  By using autoload,
this avoids needing to load *all* of the tables to access only one.

Also, PROHIBIT regexps combining all of the prohibited tables have been
compiled for the "SASLprep", "nameprep", and "trace" profiles
(previously, only "SASLprep" had its own combined PROHIBIT regexp).
  • Loading branch information
nevans committed Dec 23, 2024
1 parent 009033f commit 71b50d8
Show file tree
Hide file tree
Showing 49 changed files with 1,101 additions and 603 deletions.
2 changes: 1 addition & 1 deletion lib/net/imap/sasl/anonymous_authenticator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class AnonymousAuthenticator
# Any other keyword arguments are silently ignored.
def initialize(anon_msg = nil, anonymous_message: nil, **)
message = (anonymous_message || anon_msg || "").to_str
@anonymous_message = StringPrep::Trace.stringprep_trace message
@anonymous_message = IMAP::StringPrep::Trace.stringprep_trace message
if (size = @anonymous_message&.length)&.> 255
raise ArgumentError,
"anonymous_message is too long. (%d codepoints)" % [size]
Expand Down
29 changes: 15 additions & 14 deletions lib/net/imap/stringprep.rb
Original file line number Diff line number Diff line change
Expand Up @@ -73,34 +73,35 @@ def self.[](table)
# The above steps MUST be performed in the order given to comply with
# this specification.
#
def stringprep(string,
maps:,
normalization:,
prohibited:,
**opts)
def stringprep(string, maps:, normalization:, prohibited:, **opts)
string = string.encode("UTF-8") # also dups (and raises invalid encoding)
map_tables!(string, *maps) if maps
map!(string, maps) if maps&.any?
string.unicode_normalize!(normalization) if normalization
check_prohibited!(string, *prohibited, **opts) if prohibited
string
end

def map_tables!(string, *tables)
tables.each do |table|
regexp, replacements = Tables::MAPPINGS.fetch(table)
string.gsub!(regexp, replacements)
def map!(string, mappings)
mappings.each do |mapping|
mapping = Tables::MAPPINGS.fetch(mapping) if mapping.is_a?(String)
string.gsub!(*mapping)
end
string
end

def map_tables!(string, *tables)
warn "map_tables! is deprecated. Use map! instead."
map!(string, tables.map { Tables::MAPPINGS.fetch(table) })
end

# Checks +string+ for any codepoint in +tables+. Raises a
# ProhibitedCodepoint describing the first matching table.
#
# Also checks bidirectional characters, when <tt>bidi: true</tt>, which may
# raise a BidiStringError.
# Also checks bidirectional characters, when <tt>bidi: true</tt>, which
# may raise a BidiStringError.
#
# +profile+ is an optional string which will be added to any exception that
# is raised (it does not affect behavior).
# +profile+ is an optional string which will be added to any exception
# that is raised (it does not affect behavior).
def check_prohibited!(string,
*tables,
bidi: false,
Expand Down
124 changes: 95 additions & 29 deletions lib/net/imap/stringprep/saslprep.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,60 +4,126 @@ module Net
class IMAP
module StringPrep

# SASLprep#saslprep can be used to prepare a string according to [RFC4013].
# SASLprep#saslprep can be used to prepare a string according to
# RFC4013[https://tools.ietf.org/html/rfc4013].
#
# \SASLprep maps characters three ways: to nothing, to space, and Unicode
# normalization form KC. \SASLprep prohibits codepoints from nearly all
# standard StringPrep tables (RFC3454, Appendix "C"), and uses
# standard StringPrep tables
# (RFC3454[https://tools.ietf.org/html/rfc3454], Appendix "C"), and uses
# \StringPrep's standard bidirectional characters requirements (Appendix
# "D"). \SASLprep also uses \StringPrep's definition of "Unassigned"
# codepoints (Appendix "A").
module SASLprep
# Avoid loading these tables unless they are needed (for non-ASCII).
autoload :PROHIBITED_OUTPUT, "#{__dir__}/saslprep/prohibited.rb"
autoload :PROHIBITED_OUTPUT_STORED, "#{__dir__}/saslprep/prohibited.rb"
autoload :PROHIBITED, "#{__dir__}/saslprep/prohibited.rb"
autoload :PROHIBITED_STORED, "#{__dir__}/saslprep/prohibited.rb"

# Defined in RFC4013[https://tools.ietf.org/html/rfc4013].
STRINGPREP_PROFILE = "SASLprep"

# Used to short-circuit strings that don't need preparation.
ASCII_NO_CTRLS = /\A[\x20-\x7e]*\z/u.freeze

# Avoid loading these tables unless they are needed (they are only
# needed for non-ASCII).
saslprep_tables = File.expand_path("saslprep_tables", __dir__)
autoload :MAP_TO_NOTHING, saslprep_tables
autoload :MAP_TO_SPACE, saslprep_tables
autoload :PROHIBITED, saslprep_tables
autoload :PROHIBITED_STORED, saslprep_tables
autoload :TABLES_PROHIBITED, saslprep_tables
autoload :TABLES_PROHIBITED_STORED, saslprep_tables
# Regexp for RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping -
# mapped to space
MAP_TO_SPACE = Tables::IN_C_1_2

# Regexp for RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping -
# mapped to nothing
MAP_TO_NOTHING = Tables::IN_B_1

# RFC4013[https://tools.ietf.org/html/rfc4013] §2.1 Mapping
# >>>
# This profile specifies:
# - non-ASCII space characters (\StringPrep\[\"C.1.2\"]) that can
# be mapped to SPACE (U+0020)
# - the "commonly mapped to nothing" characters
# (\StringPrep\[\"B.1\"]) that can be mapped to nothing.
MAPPINGS = {
MAP_TO_SPACE => " ",
MAP_TO_NOTHING => "",
}.freeze

# RFC4013[https://tools.ietf.org/html/rfc4013] §2.2 Normalization
# >>>
# This profile specifies using Unicode normalization form KC, as
# described in Section 4 of [StringPrep].
NORMALIZATION = :nfkc

# RFC4013[https://tools.ietf.org/html/rfc4013] §2.3 Prohibited Output
# >>>
# * Non-ASCII space characters — \StringPrep\[\"C.1.2\"]
# * ASCII control characters — \StringPrep\[\"C.2.1\"]
# * Non-ASCII control characters — \StringPrep\[\"C.2.2\"]
# * Private Use characters — \StringPrep\[\"C.3\"]
# * Non-character code points — \StringPrep\[\"C.4\"]
# * Surrogate code points — \StringPrep\[\"C.5\"]
# * Inappropriate for plain text characters — \StringPrep\[\"C.6\"]
# * Inappropriate for canonical representation characters — \StringPrep\[\"C.7\"]
# * Change display properties or deprecated characters — \StringPrep\[\"C.8\"]
# * Tagging characters — \StringPrep\[\"C.9\"]
PROHIBITED_TABLES = %w[C.1.2 C.2.1 C.2.2 C.3 C.4 C.5 C.6 C.7 C.8 C.9]
.freeze

# RFC4013[https://tools.ietf.org/html/rfc4013] §2.4 Bidirectional
# Characters
# >>>
# This profile specifies checking bidirectional strings as described
# in [StringPrep, Section 6].
CHECK_BIDI = true

# RFC4013[https://tools.ietf.org/html/rfc4013] §2.5 Unassigned Code
# Points
# >>>
# This profile specifies the \StringPrep\[\"A.1\"] table as its
# list of unassigned code points.
UNASSIGNED_TABLE = "A.1"

# :nodoc:
UNASSIGNED = Tables::IN_A_1
deprecate_constant :UNASSIGNED

module_function

# Prepares a UTF-8 +string+ for comparison, using the \SASLprep profile
# RFC4013 of the StringPrep algorithm RFC3454.
# {[RFC4013]}[https://tools.ietf.org/html/rfc4013] of the StringPrep
# algorithm {[RFC3454]}[https://tools.ietf.org/html/rfc3454].
#
# By default, prohibited strings will return +nil+. When +exception+ is
# +true+, a StringPrepError describing the violation will be raised.
#
# When +stored+ is +true+, "unassigned" codepoints will be prohibited.
# For \StringPrep and the \SASLprep profile, "unassigned" refers to
# Unicode 3.2, and not later versions. See RFC3454 §7 for more
# Unicode 3.2, and not later versions. See RFC3454[https://tools.ietf.org/html/rfc3454] §7 for more
# information.
def saslprep(str, stored: false, exception: false)
return str if ASCII_NO_CTRLS.match?(str) # incompatible encoding raises
str = str.encode("UTF-8") # also dups (and raises for invalid encoding)
str.gsub!(MAP_TO_SPACE, " ")
str.gsub!(MAP_TO_NOTHING, "")
str.unicode_normalize!(:nfkc)
# These regexps combine the prohibited and bidirectional checks
return str unless str.match?(stored ? PROHIBITED_STORED : PROHIBITED)
return nil unless exception
# raise helpful errors to indicate *why* it failed:
tables = stored ? TABLES_PROHIBITED_STORED : TABLES_PROHIBITED
StringPrep.check_prohibited! str, *tables, bidi: true, profile: "SASLprep"
raise InvalidStringError.new(
"unknown error", string: string, profile: "SASLprep"
)
def saslprep(original, stored: false, exception: false)
return original if ASCII_NO_CTRLS.match?(original) # incompatible encoding raises
if exception
StringPrep.stringprep(
original,
unassigned: UNASSIGNED_TABLE,
maps: MAPPINGS,
prohibited: PROHIBITED_TABLES,
normalization: NORMALIZATION,
bidi: CHECK_BIDI,
stored: stored,
profile: STRINGPREP_PROFILE,
)
else
str = original.encode("UTF-8") # also dups (and raises for invalid encoding)
str.gsub!(MAP_TO_SPACE, " ")
str.gsub!(MAP_TO_NOTHING, "")
str.unicode_normalize!(:nfkc)
str unless str.match?(stored ? PROHIBITED_STORED : PROHIBITED)
end
rescue ArgumentError, Encoding::CompatibilityError => ex
if /invalid byte sequence|incompatible encoding/.match? ex.message
return nil unless exception
raise StringPrepError.new(ex.message, string: str, profile: "saslprep")
raise StringPrepError.new(ex.message, string: str,
profile: STRINGPREP_PROFILE)
end
raise ex
end
Expand Down
22 changes: 22 additions & 0 deletions lib/net/imap/stringprep/saslprep/prohibited.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# frozen_string_literal: true

module Net::IMAP::StringPrep

module SASLprep

# :nodoc:
PROHIBITED_OUTPUT = Tables::SASLPREP_PROHIBIT

# :nodoc:
PROHIBITED_OUTPUT_STORED = Tables::SASLPREP_PROHIBIT_STORED

# :nodoc:
PROHIBITED = Regexp.union(PROHIBITED_OUTPUT, Tables::BIDI_FAILURE)

# :nodoc:
PROHIBITED_STORED = Regexp.union(
PROHIBITED_OUTPUT_STORED, Tables::BIDI_FAILURE,
)

end
end
171 changes: 36 additions & 135 deletions lib/net/imap/stringprep/tables.rb

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions lib/net/imap/stringprep/tables/bidi_desc_req2.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# frozen_string_literal: true

#--
# This file is generated by `rake stringprep:tables`. Don't edit directly.
#++

module Net::IMAP::StringPrep
module Tables

BIDI_DESC_REQ2 = "A string with RandALCat characters must not contain LCat characters."

end
end
13 changes: 13 additions & 0 deletions lib/net/imap/stringprep/tables/bidi_desc_req3.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# frozen_string_literal: true

#--
# This file is generated by `rake stringprep:tables`. Don't edit directly.
#++

module Net::IMAP::StringPrep
module Tables

BIDI_DESC_REQ3 = "A string with RandALCat characters must start and end with RandALCat characters."

end
end
Loading

0 comments on commit 71b50d8

Please sign in to comment.