Skip to content

Commit

Permalink
prototype of :in() pseudo-class selector
Browse files Browse the repository at this point in the history
  • Loading branch information
facelessuser committed Sep 11, 2021
1 parent 2abff64 commit 1801bf6
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 8 deletions.
39 changes: 39 additions & 0 deletions soupsieve/css_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,42 @@ def match_empty(self, el):
break
return is_empty

def match_in_scope(self, el, scoped_selectors):
"""Match scoped "in" selectors."""

match = True
in_bounds = False
limit = 0
inclusive, exclusive = scoped_selectors

# Find upper bound
if len(inclusive):
current = el
while current is not None:
limit += 1
if self.match_selectors(current, inclusive):
in_bounds = True
break
current = self.get_parent(current)

# Find lower bound relative to upper bound (if lower exists)
if in_bounds:
limit = limit - 1
current = self.get_parent(el)
if len(exclusive):
while limit and current is not None:
limit -= 1
if self.match_selectors(current, exclusive):
in_bounds = False
break
current = self.get_parent(current)

# Check if in bounds
if not in_bounds:
match = False

return match

def match_subselectors(self, el, selectors):
"""Match selectors."""

Expand Down Expand Up @@ -1397,6 +1433,9 @@ def match_selectors(self, el, selectors):
# Verify relationship selectors
if selector.relation and not self.match_relations(el, selector.relation):
continue
# Verify scoped `:in()` selector.
if selector.scoped_in and not self.match_in_scope(el, selector.scoped_in):
continue
# Validate that the current default selector match corresponds to the first submit button in the form
if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):
continue
Expand Down
87 changes: 81 additions & 6 deletions soupsieve/css_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
':-soup-contains-own',
':has',
':is',
':in',
':matches',
':not',
':where'
Expand Down Expand Up @@ -133,6 +134,8 @@
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER)
# Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)
# Slash option
PAT_PSEUDO_SLASH = r'{ws}*/(?!\*)'.format(ws=WSC)
# Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)
# At rule (`@page`, etc.) (not supported)
Expand Down Expand Up @@ -197,6 +200,7 @@
FLG_OUT_OF_RANGE = 0x100
FLG_PLACEHOLDER_SHOWN = 0x200
FLG_FORGIVE = 0x400
FLG_IN = 0x800

# Maximum cached patterns to store
_MAXCACHE = 500
Expand Down Expand Up @@ -368,6 +372,7 @@ def __init__(self, **kwargs):
self.nth = kwargs.get('nth', [])
self.selectors = kwargs.get('selectors', [])
self.relations = kwargs.get('relations', [])
self.scoped_in = kwargs.get('scoped_in', None)
self.rel_type = kwargs.get('rel_type', None)
self.contains = kwargs.get('contains', [])
self.lang = kwargs.get('lang', [])
Expand Down Expand Up @@ -398,6 +403,7 @@ def freeze(self):
tuple(self.nth),
tuple(self.selectors),
self._freeze_relations(self.relations),
self.scoped_in,
self.rel_type,
tuple(self.contains),
tuple(self.lang),
Expand All @@ -409,10 +415,10 @@ def __str__(self): # pragma: no cover

return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
'relations={!r}, scoped_in={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
).format(
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
self.relations, self.scoped_in, self.rel_type, self.contains, self.lang, self.flags, self.no_match
)

__repr__ = __str__
Expand All @@ -422,6 +428,7 @@ class CSSParser(object):
"""Parse CSS selectors."""

css_tokens = (
SelectorPattern("pseudo_slash", PAT_PSEUDO_SLASH),
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
SpecialPseudoPattern(
(
Expand Down Expand Up @@ -718,10 +725,13 @@ def parse_pseudo_open(self, sel, name, has_selector, iselector, index):
flags |= FLG_NOT
elif name == ':has':
flags |= FLG_RELATIVE | FLG_FORGIVE
elif name == ':in':
flags |= FLG_IN | FLG_FORGIVE
elif name in (':where', ':is'):
flags |= FLG_FORGIVE

sel.selectors.append(self.parse_selectors(iselector, index, flags))

has_selector = True

return has_selector
Expand Down Expand Up @@ -869,11 +879,37 @@ def parse_pseudo_dir(self, sel, m, has_selector):
has_selector = True
return has_selector

def parse_pseudo_in(self, sel, m, has_selector, selectors, relations, index):
"""Parse pseudo in."""

if has_selector:
# End selector
sel.relations.extend(relations)
del relations[:]
selectors.append(sel)
elif has_selector and (not selectors or (relations and relations[-1].rel_type is None)):
# Allow empty set or empty slot
sel.no_match = True
del relations[:]
selectors.append(sel)
else:
raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index),
self.pattern,
index
)

rel_type = ':' + WS_COMBINATOR
sel = _Selector()
has_selector = True
return has_selector, sel, rel_type

def parse_selectors(self, iselector, index=0, flags=0):
"""Parse selectors."""

# Initialize important variables
sel = _Selector()
in_sel = None
selectors = []
has_selector = False
closed = False
Expand All @@ -892,6 +928,7 @@ def parse_selectors(self, iselector, index=0, flags=0):
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
is_forgive = bool(flags & FLG_FORGIVE)
is_in = bool(flags & FLG_IN)

# Print out useful debug stuff
if self.debug: # pragma: no cover
Expand All @@ -917,11 +954,14 @@ def parse_selectors(self, iselector, index=0, flags=0):
print(' is_placeholder_shown: True')
if is_forgive:
print(' is_forgive: True')
if is_in:
print(' is_in: True')

# The algorithm for relative selectors require an initial selector in the selector list
if is_relative:
selectors.append(_Selector())

# Parse a given selector piece to completion
try:
while True:
key, m = next(iselector)
Expand All @@ -945,6 +985,18 @@ def parse_selectors(self, iselector, index=0, flags=0):
has_selector = self.parse_pseudo_dir(sel, m, has_selector)
# Currently only supports HTML
is_html = True
elif key == 'pseudo_slash':
if not is_in or not is_open or in_sel is not None:
raise SelectorSyntaxError(
"Unexpected slash at postion {}".format(m.start(0)),
self.pattern,
m.start(0)
)
has_selector, sel, rel_type = self.parse_pseudo_in(
sel, m, has_selector, selectors, relations, index
)
in_sel = selectors
selectors = []
elif key == 'pseudo_close':
if not has_selector:
if not is_forgive:
Expand Down Expand Up @@ -1005,6 +1057,10 @@ def parse_selectors(self, iselector, index=0, flags=0):
if is_relative:
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
elif is_in:
has_selector, sel, rel_type = self.parse_pseudo_in(
sel, None, has_selector, selectors, relations, index
)
else:
sel.relations.extend(relations)
del relations[:]
Expand All @@ -1022,10 +1078,16 @@ def parse_selectors(self, iselector, index=0, flags=0):
else:
# Handle normal pseudo-classes with empty slots
if not selectors or not relations:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
if is_in:
# Special handling for `:in()`
has_selector, sel, rel_type = self.parse_pseudo_in(
sel, None, has_selector, selectors, relations, index
)
else:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True

if not has_selector:
Expand All @@ -1051,6 +1113,19 @@ def parse_selectors(self, iselector, index=0, flags=0):
if is_placeholder_shown:
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN

# Special formatting of `:in()`
# We create a tuple of two selector lists: upper bound and lower bound
if is_in:
if in_sel is None:
in_sel = selectors
selectors = []
s = _Selector()
s.scoped_in = (
ct.SelectorList([s.freeze() for s in in_sel], False, False),
ct.SelectorList([s.freeze() for s in selectors], False, False)
)
selectors = [s]

# Return selector list
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)

Expand Down
5 changes: 3 additions & 2 deletions soupsieve/css_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,12 @@ class Selector(Immutable):

__slots__ = (
'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
'relation', 'scoped_in', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)

def __init__(
self, tag, ids, classes, attributes, nth, selectors,
relation, rel_type, contains, lang, flags
relation, scoped_in, rel_type, contains, lang, flags
):
"""Initialize."""

Expand All @@ -194,6 +194,7 @@ def __init__(
nth=nth,
selectors=selectors,
relation=relation,
scoped_in=scoped_in,
rel_type=rel_type,
contains=contains,
lang=lang,
Expand Down

0 comments on commit 1801bf6

Please sign in to comment.