python2.7
regularexpression

sre_parse.parse keyword

sre_parse.parse returns tree of automaton for regular expression.
Since there are no document, this note was taken.
(python 2.7)

import sre_parse
tree=sre_parse.parse(Regex)
print(tree)
  • LITERAL
('literal', <char code> )
>>> sre_parse.parse(r'a')
[('literal', 97)]
  • NOT_LITERAL 
('not_literal', <char code> )
>>> sre_parse.parse(r'[^a]')
[('not_literal', 97)]
  • ANY
('any', None)
>>> sre_parse.parse(r'.')
[('any', None)]
  • AT
('at', <type of position> )
>>> sre_parse.parse(r'^1')
[('at', 'at_beginning'), ('literal', 49)]
  • IN
('in', [ ] )
>>> sre_parse.parse(r'[abc]')
[('in', [('literal', 97), ('literal', 98), ('literal', 99)])]
  • NEGATE
('negate', None)
>>> sre_parse.parse(r'[^1234]')
[('in', [('negate', None), ('literal', 49), ('literal', 50), ('literal', 51), ('literal', 52)])]
  • SUBPATTERN (note: python3 has different format)
('subpattern',( <match number>, [] ))
 >>> sre_parse.parse(r'(ab)c')
[('subpattern', (1, [('literal', 97), ('literal', 98)])), ('literal', 99)]
  • RANGE
 ('range',( <start char code>, <end char code> ))
>>> sre_parse.parse(r'[1-9]')
[('in', [('range', (49, 57))])]
  • MAX_REPEAT/MIN_REPEAT
('max_repeat', (<min repeat>, <max repeat number>, [] )
>>> sre_parse.parse(r'a{5,8}')
[('max_repeat', (5, 8, [('literal', 97)]))]
  • BRANCH
('branch', (None, [ [], [] ] ))
>>> sre_parse.parse(r'abc|def')
[('branch', (None, [[('literal', 97), ('literal', 98), ('literal', 99)], [('literal', 100), ('literal', 101), ('literal', 102)]]))]
  • CATEGORY
('category', <category type> )
>>> sre_parse.parse(r'\d')
[('in', [('category', 'category_digit')])]
  • GRPUPREF
('groupref', <group reference number> )
>>> sre_parse.parse(r'(?P<abc>12)3(?P=abc)')
[('subpattern', (1, [('literal', 49), ('literal', 50)])), ('literal', 51), ('groupref', 1)]
  • ASSERT_NOT
('assert_not', (<match number>, []))
>>> sre_parse.parse(r'1(?!12)')
[('literal', 49), ('assert_not', (1, [('literal', 49), ('literal', 50)]))]