- re.search
import re
pattern = 'this'
text = 'Does this text match the pattern?'
match = re.search(pattern, text)
s = match.start()
e = match.end()
print('Found "{}"\nin "{}"\nfrom {} to {} ("{}")'.format(
match.re.pattern, match.string, s, e, text[s:e]))
- re.compile
import re
# Precompile the patterns
regexes = [
re.compile(p)
for p in ['this', 'that']
]
text = 'Does this text match the pattern?'
print('Text: {!r}\n'.format(text))
for regex in regexes:
print('Seeking "{}" ->'.format(regex.pattern),
end=' ')
if regex.search(text):
print('match!')
else:
print('no match')
- re.findall -> 返回列表
- re pattern
import re
def test_patterns(text, patterns):
"""Given source text and a list of patterns, look for
matches for each pattern within the text and print
them to stdout.
"""
# Look for each pattern in the text and print the results
for pattern, desc in patterns:
print("'{}' ({})\n".format(pattern, desc))
print(" '{}'".format(text))
for match in re.finditer(pattern, text):
s = match.start()
e = match.end()
substr = text[s:e]
n_backslashes = text[:s].count('\\')
prefix = '.' * (s + n_backslashes)
print(" {}'{}'".format(prefix, substr))
print()
return
test_patterns(
'abbaabbba',
[('ab*', 'a followed by zero or more b'),
('ab+', 'a followed by one or more b'),
('ab?', 'a followed by zero or one b'),
('ab{3}', 'a followed by three b'),
('ab{2,3}', 'a followed by two to three b')],
)
test_patterns(
'abbaabbba',
[('[ab]', 'either a or b'),
('a[ab]+', 'a followed by 1 or more a or b'),
('a[ab]+?', 'a followed by 1 or more a or b, not greedy')],
)
test_patterns(
'abbaabbba',
[(r'a((a+)|(b+))', 'capturing form'),
(r'a((?:a+)|(?:b+))', 'noncapturing')],
)
- unicode
import re
text = u'Français złoty Österreich'
pattern = r'\w+'
ascii_pattern = re.compile(pattern, re.ASCII)
unicode_pattern = re.compile(pattern)
print('Text :', text)
print('Pattern :', pattern)
print('ASCII :', list(ascii_pattern.findall(text)))
print('Unicode :', list(unicode_pattern.findall(text)))
- Verbose Expression Syntax
import re
address = re.compile('[\w\d.+-]+@([\w\d.]+\.)+(com|org|edu)')
candidates = [
u'first.last@example.com',
u'first.last+category@gmail.com',
u'valid-address@mail.example.com',
u'not-valid@example.foo',
]
for candidate in candidates:
match = address.search(candidate)
print('{:<30} {}'.format(
candidate, 'Matches' if match else 'No match')
)
- re.sub ()
import re
bold = re.compile(r'\*{2}(?P<bold_text>.*?)\*{2}')
text = 'Make this **bold**. This **too**.'
print('Text:', text)
print('Bold:', bold.sub(r'<b>\g<bold_text></b>', text))
- re.split
import re
text = '''Paragraph one
on two lines.
Paragraph two.
Paragraph three.'''
print('With findall:')
for num, para in enumerate(re.findall(r'(.+?)(\n{2,}|$)',
text,
flags=re.DOTALL)):
print(num, repr(para))
print()
print()
print('With split:')
for num, para in enumerate(re.split(r'\n{2,}', text)):
print(num, repr(para))
print()
text = '''Paragraph one
on two lines.
Paragraph two.
Paragraph three.'''
print('With split:')
for num, para in enumerate(re.split(r'(\n{2,})', text)):
print(num, repr(para))
print()