Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Experimental: Donut selector - prototype of :in() pseudo-class selector #228

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions soupsieve/css_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,45 @@ def match_empty(self, el):
break
return is_empty

def match_in_scope(self, el, scoped_selectors):
"""Match scoped "in" selectors."""

match = True
in_bounds = False
limit = 0
start, end = scoped_selectors
current = None

# Find upper bound
if len(start):
current = el
while current is not None:
limit += 1
if self.match_selectors(current, start):
in_bounds = True
break
current = self.get_parent(current)

# Find lower bound relative to upper bound (if lower exists)
if in_bounds:
if len(end):
limit = limit - 1
current = self.get_parent(el)
while limit and current is not None:
limit -= 1
# Here we are matching end with the start as the scope
# But we could do it without
if self.match_selectors(current, end):
in_bounds = False
break
current = self.get_parent(current)

# Check if in bounds
if not in_bounds:
match = False

return match

def match_subselectors(self, el, selectors):
"""Match selectors."""

Expand Down Expand Up @@ -1397,6 +1436,9 @@ def match_selectors(self, el, selectors):
# Verify relationship selectors
if selector.relation and not self.match_relations(el, selector.relation):
continue
# Verify scoped `:in()` selector.
if selector.scoped_in and not self.match_in_scope(el, selector.scoped_in):
continue
# Validate that the current default selector match corresponds to the first submit button in the form
if selector.flags & ct.SEL_DEFAULT and not self.match_default(el):
continue
Expand Down
87 changes: 81 additions & 6 deletions soupsieve/css_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
':-soup-contains-own',
':has',
':is',
':in',
':matches',
':not',
':where'
Expand Down Expand Up @@ -133,6 +134,8 @@
PAT_PSEUDO_CLASS_CUSTOM = r'(?P<name>:(?=--){ident})'.format(ident=IDENTIFIER)
# Closing pseudo group (`)`)
PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WSC)
# Slash option
PAT_PSEUDO_SLASH = r'{ws}*/(?!\*)'.format(ws=WSC)
# Pseudo element (`::pseudo-element`)
PAT_PSEUDO_ELEMENT = r':{}'.format(PAT_PSEUDO_CLASS)
# At rule (`@page`, etc.) (not supported)
Expand Down Expand Up @@ -197,6 +200,7 @@
FLG_OUT_OF_RANGE = 0x100
FLG_PLACEHOLDER_SHOWN = 0x200
FLG_FORGIVE = 0x400
FLG_IN = 0x800

# Maximum cached patterns to store
_MAXCACHE = 500
Expand Down Expand Up @@ -368,6 +372,7 @@ def __init__(self, **kwargs):
self.nth = kwargs.get('nth', [])
self.selectors = kwargs.get('selectors', [])
self.relations = kwargs.get('relations', [])
self.scoped_in = kwargs.get('scoped_in', None)
self.rel_type = kwargs.get('rel_type', None)
self.contains = kwargs.get('contains', [])
self.lang = kwargs.get('lang', [])
Expand Down Expand Up @@ -398,6 +403,7 @@ def freeze(self):
tuple(self.nth),
tuple(self.selectors),
self._freeze_relations(self.relations),
self.scoped_in,
self.rel_type,
tuple(self.contains),
tuple(self.lang),
Expand All @@ -409,10 +415,10 @@ def __str__(self): # pragma: no cover

return (
'_Selector(tag={!r}, ids={!r}, classes={!r}, attributes={!r}, nth={!r}, selectors={!r}, '
'relations={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
'relations={!r}, scoped_in={!r}, rel_type={!r}, contains={!r}, lang={!r}, flags={!r}, no_match={!r})'
).format(
self.tag, self.ids, self.classes, self.attributes, self.nth, self.selectors,
self.relations, self.rel_type, self.contains, self.lang, self.flags, self.no_match
self.relations, self.scoped_in, self.rel_type, self.contains, self.lang, self.flags, self.no_match
)

__repr__ = __str__
Expand All @@ -422,6 +428,7 @@ class CSSParser(object):
"""Parse CSS selectors."""

css_tokens = (
SelectorPattern("pseudo_slash", PAT_PSEUDO_SLASH),
SelectorPattern("pseudo_close", PAT_PSEUDO_CLOSE),
SpecialPseudoPattern(
(
Expand Down Expand Up @@ -718,10 +725,13 @@ def parse_pseudo_open(self, sel, name, has_selector, iselector, index):
flags |= FLG_NOT
elif name == ':has':
flags |= FLG_RELATIVE | FLG_FORGIVE
elif name == ':in':
flags |= FLG_IN | FLG_FORGIVE
elif name in (':where', ':is'):
flags |= FLG_FORGIVE

sel.selectors.append(self.parse_selectors(iselector, index, flags))

has_selector = True

return has_selector
Expand Down Expand Up @@ -869,11 +879,37 @@ def parse_pseudo_dir(self, sel, m, has_selector):
has_selector = True
return has_selector

def parse_pseudo_in(self, sel, m, has_selector, selectors, relations, index):
"""Parse pseudo in."""

if has_selector:
# End selector
sel.relations.extend(relations)
del relations[:]
selectors.append(sel)
elif has_selector and (not selectors or (relations and relations[-1].rel_type is None)):
# Allow empty set or empty slot
sel.no_match = True
del relations[:]
selectors.append(sel)
else:
raise SelectorSyntaxError(
'Expected a selector at position {}'.format(index),
self.pattern,
index
)

rel_type = ':' + WS_COMBINATOR
sel = _Selector()
has_selector = True
return has_selector, sel, rel_type

def parse_selectors(self, iselector, index=0, flags=0):
"""Parse selectors."""

# Initialize important variables
sel = _Selector()
in_sel = None
selectors = []
has_selector = False
closed = False
Expand All @@ -892,6 +928,7 @@ def parse_selectors(self, iselector, index=0, flags=0):
is_out_of_range = bool(flags & FLG_OUT_OF_RANGE)
is_placeholder_shown = bool(flags & FLG_PLACEHOLDER_SHOWN)
is_forgive = bool(flags & FLG_FORGIVE)
is_in = bool(flags & FLG_IN)

# Print out useful debug stuff
if self.debug: # pragma: no cover
Expand All @@ -917,11 +954,14 @@ def parse_selectors(self, iselector, index=0, flags=0):
print(' is_placeholder_shown: True')
if is_forgive:
print(' is_forgive: True')
if is_in:
print(' is_in: True')

# The algorithm for relative selectors require an initial selector in the selector list
if is_relative:
selectors.append(_Selector())

# Parse a given selector piece to completion
try:
while True:
key, m = next(iselector)
Expand All @@ -945,6 +985,18 @@ def parse_selectors(self, iselector, index=0, flags=0):
has_selector = self.parse_pseudo_dir(sel, m, has_selector)
# Currently only supports HTML
is_html = True
elif key == 'pseudo_slash':
if not is_in or not is_open or in_sel is not None:
raise SelectorSyntaxError(
"Unexpected slash at postion {}".format(m.start(0)),
self.pattern,
m.start(0)
)
has_selector, sel, rel_type = self.parse_pseudo_in(
sel, m, has_selector, selectors, relations, index
)
in_sel = selectors
selectors = []
elif key == 'pseudo_close':
if not has_selector:
if not is_forgive:
Expand Down Expand Up @@ -1005,6 +1057,10 @@ def parse_selectors(self, iselector, index=0, flags=0):
if is_relative:
sel.rel_type = rel_type
selectors[-1].relations.append(sel)
elif is_in:
has_selector, sel, rel_type = self.parse_pseudo_in(
sel, None, has_selector, selectors, relations, index
)
else:
sel.relations.extend(relations)
del relations[:]
Expand All @@ -1022,10 +1078,16 @@ def parse_selectors(self, iselector, index=0, flags=0):
else:
# Handle normal pseudo-classes with empty slots
if not selectors or not relations:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
if is_in:
# Special handling for `:in()`
has_selector, sel, rel_type = self.parse_pseudo_in(
sel, None, has_selector, selectors, relations, index
)
else:
# Others like `:is()` etc.
sel.no_match = True
del relations[:]
selectors.append(sel)
has_selector = True

if not has_selector:
Expand All @@ -1051,6 +1113,19 @@ def parse_selectors(self, iselector, index=0, flags=0):
if is_placeholder_shown:
selectors[-1].flags = ct.SEL_PLACEHOLDER_SHOWN

# Special formatting of `:in()`
# We create a tuple of two selector lists: upper bound and lower bound
if is_in:
if in_sel is None:
in_sel = selectors
selectors = []
s = _Selector()
s.scoped_in = (
ct.SelectorList([s.freeze() for s in in_sel], False, False),
ct.SelectorList([s.freeze() for s in selectors], False, False)
)
selectors = [s]

# Return selector list
return ct.SelectorList([s.freeze() for s in selectors], is_not, is_html)

Expand Down
5 changes: 3 additions & 2 deletions soupsieve/css_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,12 +177,12 @@ class Selector(Immutable):

__slots__ = (
'tag', 'ids', 'classes', 'attributes', 'nth', 'selectors',
'relation', 'rel_type', 'contains', 'lang', 'flags', '_hash'
'relation', 'scoped_in', 'rel_type', 'contains', 'lang', 'flags', '_hash'
)

def __init__(
self, tag, ids, classes, attributes, nth, selectors,
relation, rel_type, contains, lang, flags
relation, scoped_in, rel_type, contains, lang, flags
):
"""Initialize."""

Expand All @@ -194,6 +194,7 @@ def __init__(
nth=nth,
selectors=selectors,
relation=relation,
scoped_in=scoped_in,
rel_type=rel_type,
contains=contains,
lang=lang,
Expand Down