golden hour
/opt/alt/python37/lib64/python3.7
⬆️ Go Up
Upload
File/Folder
Size
Actions
__future__.py
4.98 KB
Del
OK
__phello__.foo.py
64 B
Del
OK
__pycache__
-
Del
OK
_bootlocale.py
1.76 KB
Del
OK
_collections_abc.py
25.8 KB
Del
OK
_compat_pickle.py
8.54 KB
Del
OK
_compression.py
5.21 KB
Del
OK
_dummy_thread.py
5.89 KB
Del
OK
_markupbase.py
14.26 KB
Del
OK
_osx_support.py
19.14 KB
Del
OK
_py_abc.py
6.04 KB
Del
OK
_pydecimal.py
223.33 KB
Del
OK
_pyio.py
89.47 KB
Del
OK
_sitebuiltins.py
3.04 KB
Del
OK
_strptime.py
24.91 KB
Del
OK
_sysconfigdata_dm_linux_x86_64-linux-gnu.py
26.36 KB
Del
OK
_sysconfigdata_m_linux_x86_64-linux-gnu.py
25 KB
Del
OK
_threading_local.py
7.04 KB
Del
OK
_weakrefset.py
5.55 KB
Del
OK
abc.py
5.45 KB
Del
OK
aifc.py
32.04 KB
Del
OK
antigravity.py
477 B
Del
OK
argparse.py
93.14 KB
Del
OK
ast.py
12.54 KB
Del
OK
asynchat.py
11.06 KB
Del
OK
asyncio
-
Del
OK
asyncore.py
19.65 KB
Del
OK
base64.py
19.92 KB
Del
OK
bdb.py
30.99 KB
Del
OK
binhex.py
13.63 KB
Del
OK
bisect.py
2.5 KB
Del
OK
bz2.py
12.12 KB
Del
OK
cProfile.py
6.11 KB
Del
OK
calendar.py
24.24 KB
Del
OK
cgi.py
34.23 KB
Del
OK
cgitb.py
11.74 KB
Del
OK
chunk.py
5.31 KB
Del
OK
cmd.py
14.51 KB
Del
OK
code.py
10.37 KB
Del
OK
codecs.py
35.76 KB
Del
OK
codeop.py
6.13 KB
Del
OK
collections
-
Del
OK
colorsys.py
3.97 KB
Del
OK
compileall.py
13.46 KB
Del
OK
concurrent
-
Del
OK
config-3.7m
-
Del
OK
configparser.py
53.01 KB
Del
OK
contextlib.py
24.18 KB
Del
OK
contextvars.py
129 B
Del
OK
copy.py
8.65 KB
Del
OK
copyreg.py
6.85 KB
Del
OK
crypt.py
3.27 KB
Del
OK
csv.py
15.8 KB
Del
OK
ctypes
-
Del
OK
curses
-
Del
OK
dataclasses.py
48.36 KB
Del
OK
datetime.py
84.52 KB
Del
OK
dbm
-
Del
OK
decimal.py
320 B
Del
OK
difflib.py
82.42 KB
Del
OK
dis.py
19.42 KB
Del
OK
distutils
-
Del
OK
doctest.py
102.11 KB
Del
OK
dummy_threading.py
2.75 KB
Del
OK
email
-
Del
OK
encodings
-
Del
OK
ensurepip
-
Del
OK
enum.py
34.22 KB
Del
OK
filecmp.py
9.6 KB
Del
OK
fileinput.py
14.28 KB
Del
OK
fnmatch.py
3.96 KB
Del
OK
formatter.py
14.79 KB
Del
OK
fractions.py
23.2 KB
Del
OK
ftplib.py
34.78 KB
Del
OK
functools.py
32.16 KB
Del
OK
genericpath.py
4.8 KB
Del
OK
getopt.py
7.31 KB
Del
OK
getpass.py
5.85 KB
Del
OK
gettext.py
21.45 KB
Del
OK
glob.py
5.51 KB
Del
OK
gzip.py
20.15 KB
Del
OK
hashlib.py
9.31 KB
Del
OK
heapq.py
22.48 KB
Del
OK
hmac.py
6.36 KB
Del
OK
html
-
Del
OK
http
-
Del
OK
idlelib
-
Del
OK
imaplib.py
52.04 KB
Del
OK
imghdr.py
3.71 KB
Del
OK
imp.py
10.29 KB
Del
OK
importlib
-
Del
OK
inspect.py
114.88 KB
Del
OK
io.py
3.43 KB
Del
OK
ipaddress.py
71.85 KB
Del
OK
json
-
Del
OK
keyword.py
2.2 KB
Del
OK
lib-dynload
-
Del
OK
lib2to3
-
Del
OK
linecache.py
5.21 KB
Del
OK
locale.py
76.36 KB
Del
OK
logging
-
Del
OK
lzma.py
12.68 KB
Del
OK
macpath.py
5.98 KB
Del
OK
mailbox.py
76.81 KB
Del
OK
mailcap.py
8.85 KB
Del
OK
mimetypes.py
20.99 KB
Del
OK
modulefinder.py
22.5 KB
Del
OK
multiprocessing
-
Del
OK
netrc.py
5.44 KB
Del
OK
nntplib.py
42.08 KB
Del
OK
ntpath.py
21.82 KB
Del
OK
nturl2path.py
2.52 KB
Del
OK
numbers.py
10 KB
Del
OK
opcode.py
5.69 KB
Del
OK
operator.py
10.61 KB
Del
OK
optparse.py
58.96 KB
Del
OK
os.py
37.01 KB
Del
OK
pathlib.py
49.15 KB
Del
OK
pdb.py
61.04 KB
Del
OK
pickle.py
56.63 KB
Del
OK
pickletools.py
89.08 KB
Del
OK
pipes.py
8.71 KB
Del
OK
pkgutil.py
20.96 KB
Del
OK
platform.py
45.89 KB
Del
OK
plistlib.py
29.99 KB
Del
OK
poplib.py
14.61 KB
Del
OK
posixpath.py
15.4 KB
Del
OK
pprint.py
20.39 KB
Del
OK
profile.py
21.97 KB
Del
OK
pstats.py
26.67 KB
Del
OK
pty.py
4.65 KB
Del
OK
py_compile.py
7.81 KB
Del
OK
pyclbr.py
14.78 KB
Del
OK
pydoc.py
103.39 KB
Del
OK
pydoc_data
-
Del
OK
queue.py
11.09 KB
Del
OK
quopri.py
7.09 KB
Del
OK
random.py
26.91 KB
Del
OK
re.py
14.95 KB
Del
OK
reprlib.py
5.14 KB
Del
OK
rlcompleter.py
6.93 KB
Del
OK
runpy.py
11.68 KB
Del
OK
sched.py
6.29 KB
Del
OK
secrets.py
1.99 KB
Del
OK
selectors.py
18.13 KB
Del
OK
shelve.py
8.33 KB
Del
OK
shlex.py
12.79 KB
Del
OK
shutil.py
40.97 KB
Del
OK
signal.py
2.07 KB
Del
OK
site-packages
-
Del
OK
site.py
21.07 KB
Del
OK
smtpd.py
33.91 KB
Del
OK
smtplib.py
43.4 KB
Del
OK
sndhdr.py
6.92 KB
Del
OK
socket.py
26.83 KB
Del
OK
socketserver.py
26.29 KB
Del
OK
sqlite3
-
Del
OK
sre_compile.py
26.24 KB
Del
OK
sre_constants.py
7.01 KB
Del
OK
sre_parse.py
38.24 KB
Del
OK
ssl.py
44.43 KB
Del
OK
stat.py
5.26 KB
Del
OK
statistics.py
20.17 KB
Del
OK
string.py
11.29 KB
Del
OK
stringprep.py
12.61 KB
Del
OK
struct.py
257 B
Del
OK
subprocess.py
70.95 KB
Del
OK
sunau.py
17.94 KB
Del
OK
symbol.py
2.09 KB
Del
OK
symtable.py
7.11 KB
Del
OK
sysconfig.py
23.87 KB
Del
OK
tabnanny.py
11.15 KB
Del
OK
tarfile.py
90.5 KB
Del
OK
telnetlib.py
22.59 KB
Del
OK
tempfile.py
26.1 KB
Del
OK
test
-
Del
OK
textwrap.py
18.95 KB
Del
OK
this.py
1003 B
Del
OK
threading.py
48.13 KB
Del
OK
timeit.py
13.18 KB
Del
OK
token.py
3.67 KB
Del
OK
tokenize.py
26.4 KB
Del
OK
trace.py
28.23 KB
Del
OK
traceback.py
22.89 KB
Del
OK
tracemalloc.py
16.68 KB
Del
OK
tty.py
879 B
Del
OK
types.py
9.67 KB
Del
OK
typing.py
55.12 KB
Del
OK
unittest
-
Del
OK
urllib
-
Del
OK
uu.py
7.11 KB
Del
OK
uuid.py
28.83 KB
Del
OK
venv
-
Del
OK
warnings.py
19.61 KB
Del
OK
wave.py
17.8 KB
Del
OK
weakref.py
21 KB
Del
OK
webbrowser.py
23.16 KB
Del
OK
wsgiref
-
Del
OK
xdrlib.py
5.77 KB
Del
OK
xml
-
Del
OK
xmlrpc
-
Del
OK
zipapp.py
7.36 KB
Del
OK
zipfile.py
79.19 KB
Del
OK
Edit: sre_parse.py
# # Secret Labs' Regular Expression Engine # # convert re-style regular expression to sre pattern # # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # """Internal support module for sre""" # XXX: show string offset and offending character for all errors from sre_constants import * SPECIAL_CHARS = ".\\[{()*+?^$|" REPEAT_CHARS = "*+?{" DIGITS = frozenset("0123456789") OCTDIGITS = frozenset("01234567") HEXDIGITS = frozenset("0123456789abcdefABCDEF") ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ") WHITESPACE = frozenset(" \t\n\r\v\f") _REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT}) _UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY}) ESCAPES = { r"\a": (LITERAL, ord("\a")), r"\b": (LITERAL, ord("\b")), r"\f": (LITERAL, ord("\f")), r"\n": (LITERAL, ord("\n")), r"\r": (LITERAL, ord("\r")), r"\t": (LITERAL, ord("\t")), r"\v": (LITERAL, ord("\v")), r"\\": (LITERAL, ord("\\")) } CATEGORIES = { r"\A": (AT, AT_BEGINNING_STRING), # start of string r"\b": (AT, AT_BOUNDARY), r"\B": (AT, AT_NON_BOUNDARY), r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), r"\Z": (AT, AT_END_STRING), # end of string } FLAGS = { # standard flags "i": SRE_FLAG_IGNORECASE, "L": SRE_FLAG_LOCALE, "m": SRE_FLAG_MULTILINE, "s": SRE_FLAG_DOTALL, "x": SRE_FLAG_VERBOSE, # extensions "a": SRE_FLAG_ASCII, "t": SRE_FLAG_TEMPLATE, "u": SRE_FLAG_UNICODE, } TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE GLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE class Verbose(Exception): pass class Pattern: # master pattern object. keeps track of global attributes def __init__(self): self.flags = 0 self.groupdict = {} self.groupwidths = [None] # group 0 self.lookbehindgroups = None @property def groups(self): return len(self.groupwidths) def opengroup(self, name=None): gid = self.groups self.groupwidths.append(None) if self.groups > MAXGROUPS: raise error("too many groups") if name is not None: ogid = self.groupdict.get(name, None) if ogid is not None: raise error("redefinition of group name %r as group %d; " "was group %d" % (name, gid, ogid)) self.groupdict[name] = gid return gid def closegroup(self, gid, p): self.groupwidths[gid] = p.getwidth() def checkgroup(self, gid): return gid < self.groups and self.groupwidths[gid] is not None def checklookbehindgroup(self, gid, source): if self.lookbehindgroups is not None: if not self.checkgroup(gid): raise source.error('cannot refer to an open group') if gid >= self.lookbehindgroups: raise source.error('cannot refer to group defined in the same ' 'lookbehind subpattern') class SubPattern: # a subpattern, in intermediate form def __init__(self, pattern, data=None): self.pattern = pattern if data is None: data = [] self.data = data self.width = None def dump(self, level=0): nl = True seqtypes = (tuple, list) for op, av in self.data: print(level*" " + str(op), end='') if op is IN: # member sublanguage print() for op, a in av: print((level+1)*" " + str(op), a) elif op is BRANCH: print() for i, a in enumerate(av[1]): if i: print(level*" " + "OR") a.dump(level+1) elif op is GROUPREF_EXISTS: condgroup, item_yes, item_no = av print('', condgroup) item_yes.dump(level+1) if item_no: print(level*" " + "ELSE") item_no.dump(level+1) elif isinstance(av, seqtypes): nl = False for a in av: if isinstance(a, SubPattern): if not nl: print() a.dump(level+1) nl = True else: if not nl: print(' ', end='') print(a, end='') nl = False if not nl: print() else: print('', av) def __repr__(self): return repr(self.data) def __len__(self): return len(self.data) def __delitem__(self, index): del self.data[index] def __getitem__(self, index): if isinstance(index, slice): return SubPattern(self.pattern, self.data[index]) return self.data[index] def __setitem__(self, index, code): self.data[index] = code def insert(self, index, code): self.data.insert(index, code) def append(self, code): self.data.append(code) def getwidth(self): # determine the width (min, max) for this subpattern if self.width is not None: return self.width lo = hi = 0 for op, av in self.data: if op is BRANCH: i = MAXREPEAT - 1 j = 0 for av in av[1]: l, h = av.getwidth() i = min(i, l) j = max(j, h) lo = lo + i hi = hi + j elif op is CALL: i, j = av.getwidth() lo = lo + i hi = hi + j elif op is SUBPATTERN: i, j = av[-1].getwidth() lo = lo + i hi = hi + j elif op in _REPEATCODES: i, j = av[2].getwidth() lo = lo + i * av[0] hi = hi + j * av[1] elif op in _UNITCODES: lo = lo + 1 hi = hi + 1 elif op is GROUPREF: i, j = self.pattern.groupwidths[av] lo = lo + i hi = hi + j elif op is GROUPREF_EXISTS: i, j = av[1].getwidth() if av[2] is not None: l, h = av[2].getwidth() i = min(i, l) j = max(j, h) else: i = 0 lo = lo + i hi = hi + j elif op is SUCCESS: break self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT) return self.width class Tokenizer: def __init__(self, string): self.istext = isinstance(string, str) self.string = string if not self.istext: string = str(string, 'latin1') self.decoded_string = string self.index = 0 self.next = None self.__next() def __next(self): index = self.index try: char = self.decoded_string[index] except IndexError: self.next = None return if char == "\\": index += 1 try: char += self.decoded_string[index] except IndexError: raise error("bad escape (end of pattern)", self.string, len(self.string) - 1) from None self.index = index + 1 self.next = char def match(self, char): if char == self.next: self.__next() return True return False def get(self): this = self.next self.__next() return this def getwhile(self, n, charset): result = '' for _ in range(n): c = self.next if c not in charset: break result += c self.__next() return result def getuntil(self, terminator): result = '' while True: c = self.next self.__next() if c is None: if not result: raise self.error("missing group name") raise self.error("missing %s, unterminated name" % terminator, len(result)) if c == terminator: if not result: raise self.error("missing group name", 1) break result += c return result @property def pos(self): return self.index - len(self.next or '') def tell(self): return self.index - len(self.next or '') def seek(self, index): self.index = index self.__next() def error(self, msg, offset=0): return error(msg, self.string, self.tell() - offset) def _class_escape(source, escape): # handle escape code inside character class code = ESCAPES.get(escape) if code: return code code = CATEGORIES.get(escape) if code and code[0] is IN: return code try: c = escape[1:2] if c == "x": # hexadecimal escape (exactly two digits) escape += source.getwhile(2, HEXDIGITS) if len(escape) != 4: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "u" and source.istext: # unicode escape (exactly four digits) escape += source.getwhile(4, HEXDIGITS) if len(escape) != 6: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "U" and source.istext: # unicode escape (exactly eight digits) escape += source.getwhile(8, HEXDIGITS) if len(escape) != 10: raise source.error("incomplete escape %s" % escape, len(escape)) c = int(escape[2:], 16) chr(c) # raise ValueError for invalid code return LITERAL, c elif c in OCTDIGITS: # octal escape (up to three digits) escape += source.getwhile(2, OCTDIGITS) c = int(escape[1:], 8) if c > 0o377: raise source.error('octal escape value %s outside of ' 'range 0-0o377' % escape, len(escape)) return LITERAL, c elif c in DIGITS: raise ValueError if len(escape) == 2: if c in ASCIILETTERS: raise source.error('bad escape %s' % escape, len(escape)) return LITERAL, ord(escape[1]) except ValueError: pass raise source.error("bad escape %s" % escape, len(escape)) def _escape(source, escape, state): # handle escape code in expression code = CATEGORIES.get(escape) if code: return code code = ESCAPES.get(escape) if code: return code try: c = escape[1:2] if c == "x": # hexadecimal escape escape += source.getwhile(2, HEXDIGITS) if len(escape) != 4: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "u" and source.istext: # unicode escape (exactly four digits) escape += source.getwhile(4, HEXDIGITS) if len(escape) != 6: raise source.error("incomplete escape %s" % escape, len(escape)) return LITERAL, int(escape[2:], 16) elif c == "U" and source.istext: # unicode escape (exactly eight digits) escape += source.getwhile(8, HEXDIGITS) if len(escape) != 10: raise source.error("incomplete escape %s" % escape, len(escape)) c = int(escape[2:], 16) chr(c) # raise ValueError for invalid code return LITERAL, c elif c == "0": # octal escape escape += source.getwhile(2, OCTDIGITS) return LITERAL, int(escape[1:], 8) elif c in DIGITS: # octal escape *or* decimal group reference (sigh) if source.next in DIGITS: escape += source.get() if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and source.next in OCTDIGITS): # got three octal digits; this is an octal escape escape += source.get() c = int(escape[1:], 8) if c > 0o377: raise source.error('octal escape value %s outside of ' 'range 0-0o377' % escape, len(escape)) return LITERAL, c # not an octal escape, so this is a group reference group = int(escape[1:]) if group < state.groups: if not state.checkgroup(group): raise source.error("cannot refer to an open group", len(escape)) state.checklookbehindgroup(group, source) return GROUPREF, group raise source.error("invalid group reference %d" % group, len(escape) - 1) if len(escape) == 2: if c in ASCIILETTERS: raise source.error("bad escape %s" % escape, len(escape)) return LITERAL, ord(escape[1]) except ValueError: pass raise source.error("bad escape %s" % escape, len(escape)) def _uniq(items): return list(dict.fromkeys(items)) def _parse_sub(source, state, verbose, nested): # parse an alternation: a|b|c items = [] itemsappend = items.append sourcematch = source.match start = source.tell() while True: itemsappend(_parse(source, state, verbose, nested + 1, not nested and not items)) if not sourcematch("|"): break if len(items) == 1: return items[0] subpattern = SubPattern(state) # check if all items share a common prefix while True: prefix = None for item in items: if not item: break if prefix is None: prefix = item[0] elif item[0] != prefix: break else: # all subitems start with a common "prefix". # move it out of the branch for item in items: del item[0] subpattern.append(prefix) continue # check next one break # check if the branch can be replaced by a character set set = [] for item in items: if len(item) != 1: break op, av = item[0] if op is LITERAL: set.append((op, av)) elif op is IN and av[0][0] is not NEGATE: set.extend(av) else: break else: # we can store this as a character set instead of a # branch (the compiler may optimize this even more) subpattern.append((IN, _uniq(set))) return subpattern subpattern.append((BRANCH, (None, items))) return subpattern def _parse(source, state, verbose, nested, first=False): # parse a simple pattern subpattern = SubPattern(state) # precompute constants into local variables subpatternappend = subpattern.append sourceget = source.get sourcematch = source.match _len = len _ord = ord while True: this = source.next if this is None: break # end of pattern if this in "|)": break # end of subpattern sourceget() if verbose: # skip whitespace and comments if this in WHITESPACE: continue if this == "#": while True: this = sourceget() if this is None or this == "\n": break continue if this[0] == "\\": code = _escape(source, this, state) subpatternappend(code) elif this not in SPECIAL_CHARS: subpatternappend((LITERAL, _ord(this))) elif this == "[": here = source.tell() - 1 # character set set = [] setappend = set.append ## if sourcematch(":"): ## pass # handle character classes if source.next == '[': import warnings warnings.warn( 'Possible nested set at position %d' % source.tell(), FutureWarning, stacklevel=nested + 6 ) negate = sourcematch("^") # check remaining characters while True: this = sourceget() if this is None: raise source.error("unterminated character set", source.tell() - here) if this == "]" and set: break elif this[0] == "\\": code1 = _class_escape(source, this) else: if set and this in '-&~|' and source.next == this: import warnings warnings.warn( 'Possible set %s at position %d' % ( 'difference' if this == '-' else 'intersection' if this == '&' else 'symmetric difference' if this == '~' else 'union', source.tell() - 1), FutureWarning, stacklevel=nested + 6 ) code1 = LITERAL, _ord(this) if sourcematch("-"): # potential range that = sourceget() if that is None: raise source.error("unterminated character set", source.tell() - here) if that == "]": if code1[0] is IN: code1 = code1[1][0] setappend(code1) setappend((LITERAL, _ord("-"))) break if that[0] == "\\": code2 = _class_escape(source, that) else: if that == '-': import warnings warnings.warn( 'Possible set difference at position %d' % ( source.tell() - 2), FutureWarning, stacklevel=nested + 6 ) code2 = LITERAL, _ord(that) if code1[0] != LITERAL or code2[0] != LITERAL: msg = "bad character range %s-%s" % (this, that) raise source.error(msg, len(this) + 1 + len(that)) lo = code1[1] hi = code2[1] if hi < lo: msg = "bad character range %s-%s" % (this, that) raise source.error(msg, len(this) + 1 + len(that)) setappend((RANGE, (lo, hi))) else: if code1[0] is IN: code1 = code1[1][0] setappend(code1) set = _uniq(set) # XXX: <fl> should move set optimization to compiler! if _len(set) == 1 and set[0][0] is LITERAL: # optimization if negate: subpatternappend((NOT_LITERAL, set[0][1])) else: subpatternappend(set[0]) else: if negate: set.insert(0, (NEGATE, None)) # charmap optimization can't be added here because # global flags still are not known subpatternappend((IN, set)) elif this in REPEAT_CHARS: # repeat previous item here = source.tell() if this == "?": min, max = 0, 1 elif this == "*": min, max = 0, MAXREPEAT elif this == "+": min, max = 1, MAXREPEAT elif this == "{": if source.next == "}": subpatternappend((LITERAL, _ord(this))) continue min, max = 0, MAXREPEAT lo = hi = "" while source.next in DIGITS: lo += sourceget() if sourcematch(","): while source.next in DIGITS: hi += sourceget() else: hi = lo if not sourcematch("}"): subpatternappend((LITERAL, _ord(this))) source.seek(here) continue if lo: min = int(lo) if min >= MAXREPEAT: raise OverflowError("the repetition number is too large") if hi: max = int(hi) if max >= MAXREPEAT: raise OverflowError("the repetition number is too large") if max < min: raise source.error("min repeat greater than max repeat", source.tell() - here) else: raise AssertionError("unsupported quantifier %r" % (char,)) # figure out which item to repeat if subpattern: item = subpattern[-1:] else: item = None if not item or item[0][0] is AT: raise source.error("nothing to repeat", source.tell() - here + len(this)) if item[0][0] in _REPEATCODES: raise source.error("multiple repeat", source.tell() - here + len(this)) if item[0][0] is SUBPATTERN: group, add_flags, del_flags, p = item[0][1] if group is None and not add_flags and not del_flags: item = p if sourcematch("?"): subpattern[-1] = (MIN_REPEAT, (min, max, item)) else: subpattern[-1] = (MAX_REPEAT, (min, max, item)) elif this == ".": subpatternappend((ANY, None)) elif this == "(": start = source.tell() - 1 group = True name = None add_flags = 0 del_flags = 0 if sourcematch("?"): # options char = sourceget() if char is None: raise source.error("unexpected end of pattern") if char == "P": # python extensions if sourcematch("<"): # named group: skip forward to end of name name = source.getuntil(">") if not name.isidentifier(): msg = "bad character in group name %r" % name raise source.error(msg, len(name) + 1) elif sourcematch("="): # named backreference name = source.getuntil(")") if not name.isidentifier(): msg = "bad character in group name %r" % name raise source.error(msg, len(name) + 1) gid = state.groupdict.get(name) if gid is None: msg = "unknown group name %r" % name raise source.error(msg, len(name) + 1) if not state.checkgroup(gid): raise source.error("cannot refer to an open group", len(name) + 1) state.checklookbehindgroup(gid, source) subpatternappend((GROUPREF, gid)) continue else: char = sourceget() if char is None: raise source.error("unexpected end of pattern") raise source.error("unknown extension ?P" + char, len(char) + 2) elif char == ":": # non-capturing group group = None elif char == "#": # comment while True: if source.next is None: raise source.error("missing ), unterminated comment", source.tell() - start) if sourceget() == ")": break continue elif char in "=!<": # lookahead assertions dir = 1 if char == "<": char = sourceget() if char is None: raise source.error("unexpected end of pattern") if char not in "=!": raise source.error("unknown extension ?<" + char, len(char) + 2) dir = -1 # lookbehind lookbehindgroups = state.lookbehindgroups if lookbehindgroups is None: state.lookbehindgroups = state.groups p = _parse_sub(source, state, verbose, nested + 1) if dir < 0: if lookbehindgroups is None: state.lookbehindgroups = None if not sourcematch(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) if char == "=": subpatternappend((ASSERT, (dir, p))) else: subpatternappend((ASSERT_NOT, (dir, p))) continue elif char == "(": # conditional backreference group condname = source.getuntil(")") if condname.isidentifier(): condgroup = state.groupdict.get(condname) if condgroup is None: msg = "unknown group name %r" % condname raise source.error(msg, len(condname) + 1) else: try: condgroup = int(condname) if condgroup < 0: raise ValueError except ValueError: msg = "bad character in group name %r" % condname raise source.error(msg, len(condname) + 1) from None if not condgroup: raise source.error("bad group number", len(condname) + 1) if condgroup >= MAXGROUPS: msg = "invalid group reference %d" % condgroup raise source.error(msg, len(condname) + 1) state.checklookbehindgroup(condgroup, source) item_yes = _parse(source, state, verbose, nested + 1) if source.match("|"): item_no = _parse(source, state, verbose, nested + 1) if source.next == "|": raise source.error("conditional backref with more than two branches") else: item_no = None if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no))) continue elif char in FLAGS or char == "-": # flags flags = _parse_flags(source, state, char) if flags is None: # global flags if not first or subpattern: import warnings warnings.warn( 'Flags not at the start of the expression %r%s' % ( source.string[:20], # truncate long regexes ' (truncated)' if len(source.string) > 20 else '', ), DeprecationWarning, stacklevel=nested + 6 ) if (state.flags & SRE_FLAG_VERBOSE) and not verbose: raise Verbose continue add_flags, del_flags = flags group = None else: raise source.error("unknown extension ?" + char, len(char) + 1) # parse group contents if group is not None: try: group = state.opengroup(name) except error as err: raise source.error(err.msg, len(name) + 1) from None sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and not (del_flags & SRE_FLAG_VERBOSE)) p = _parse_sub(source, state, sub_verbose, nested + 1) if not source.match(")"): raise source.error("missing ), unterminated subpattern", source.tell() - start) if group is not None: state.closegroup(group, p) subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p))) elif this == "^": subpatternappend((AT, AT_BEGINNING)) elif this == "$": subpatternappend((AT, AT_END)) else: raise AssertionError("unsupported special character %r" % (char,)) # unpack non-capturing groups for i in range(len(subpattern))[::-1]: op, av = subpattern[i] if op is SUBPATTERN: group, add_flags, del_flags, p = av if group is None and not add_flags and not del_flags: subpattern[i: i+1] = p return subpattern def _parse_flags(source, state, char): sourceget = source.get add_flags = 0 del_flags = 0 if char != "-": while True: flag = FLAGS[char] if source.istext: if char == 'L': msg = "bad inline flags: cannot use 'L' flag with a str pattern" raise source.error(msg) else: if char == 'u': msg = "bad inline flags: cannot use 'u' flag with a bytes pattern" raise source.error(msg) add_flags |= flag if (flag & TYPE_FLAGS) and (add_flags & TYPE_FLAGS) != flag: msg = "bad inline flags: flags 'a', 'u' and 'L' are incompatible" raise source.error(msg) char = sourceget() if char is None: raise source.error("missing -, : or )") if char in ")-:": break if char not in FLAGS: msg = "unknown flag" if char.isalpha() else "missing -, : or )" raise source.error(msg, len(char)) if char == ")": state.flags |= add_flags return None if add_flags & GLOBAL_FLAGS: raise source.error("bad inline flags: cannot turn on global flag", 1) if char == "-": char = sourceget() if char is None: raise source.error("missing flag") if char not in FLAGS: msg = "unknown flag" if char.isalpha() else "missing flag" raise source.error(msg, len(char)) while True: flag = FLAGS[char] if flag & TYPE_FLAGS: msg = "bad inline flags: cannot turn off flags 'a', 'u' and 'L'" raise source.error(msg) del_flags |= flag char = sourceget() if char is None: raise source.error("missing :") if char == ":": break if char not in FLAGS: msg = "unknown flag" if char.isalpha() else "missing :" raise source.error(msg, len(char)) assert char == ":" if del_flags & GLOBAL_FLAGS: raise source.error("bad inline flags: cannot turn off global flag", 1) if add_flags & del_flags: raise source.error("bad inline flags: flag turned on and off", 1) return add_flags, del_flags def fix_flags(src, flags): # Check and fix flags according to the type of pattern (str or bytes) if isinstance(src, str): if flags & SRE_FLAG_LOCALE: raise ValueError("cannot use LOCALE flag with a str pattern") if not flags & SRE_FLAG_ASCII: flags |= SRE_FLAG_UNICODE elif flags & SRE_FLAG_UNICODE: raise ValueError("ASCII and UNICODE flags are incompatible") else: if flags & SRE_FLAG_UNICODE: raise ValueError("cannot use UNICODE flag with a bytes pattern") if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII: raise ValueError("ASCII and LOCALE flags are incompatible") return flags def parse(str, flags=0, pattern=None): # parse 're' pattern into list of (opcode, argument) tuples source = Tokenizer(str) if pattern is None: pattern = Pattern() pattern.flags = flags pattern.str = str try: p = _parse_sub(source, pattern, flags & SRE_FLAG_VERBOSE, 0) except Verbose: # the VERBOSE flag was switched on inside the pattern. to be # on the safe side, we'll parse the whole thing again... pattern = Pattern() pattern.flags = flags | SRE_FLAG_VERBOSE pattern.str = str source.seek(0) p = _parse_sub(source, pattern, True, 0) p.pattern.flags = fix_flags(str, p.pattern.flags) if source.next is not None: assert source.next == ")" raise source.error("unbalanced parenthesis") if flags & SRE_FLAG_DEBUG: p.dump() return p def parse_template(source, pattern): # parse 're' replacement string into list of literals and # group references s = Tokenizer(source) sget = s.get groups = [] literals = [] literal = [] lappend = literal.append def addgroup(index, pos): if index > pattern.groups: raise s.error("invalid group reference %d" % index, pos) if literal: literals.append(''.join(literal)) del literal[:] groups.append((len(literals), index)) literals.append(None) groupindex = pattern.groupindex while True: this = sget() if this is None: break # end of replacement string if this[0] == "\\": # group c = this[1] if c == "g": name = "" if not s.match("<"): raise s.error("missing <") name = s.getuntil(">") if name.isidentifier(): try: index = groupindex[name] except KeyError: raise IndexError("unknown group name %r" % name) else: try: index = int(name) if index < 0: raise ValueError except ValueError: raise s.error("bad character in group name %r" % name, len(name) + 1) from None if index >= MAXGROUPS: raise s.error("invalid group reference %d" % index, len(name) + 1) addgroup(index, len(name) + 1) elif c == "0": if s.next in OCTDIGITS: this += sget() if s.next in OCTDIGITS: this += sget() lappend(chr(int(this[1:], 8) & 0xff)) elif c in DIGITS: isoctal = False if s.next in DIGITS: this += sget() if (c in OCTDIGITS and this[2] in OCTDIGITS and s.next in OCTDIGITS): this += sget() isoctal = True c = int(this[1:], 8) if c > 0o377: raise s.error('octal escape value %s outside of ' 'range 0-0o377' % this, len(this)) lappend(chr(c)) if not isoctal: addgroup(int(this[1:]), len(this) - 1) else: try: this = chr(ESCAPES[this][1]) except KeyError: if c in ASCIILETTERS: raise s.error('bad escape %s' % this, len(this)) lappend(this) else: lappend(this) if literal: literals.append(''.join(literal)) if not isinstance(source, str): # The tokenizer implicitly decodes bytes objects as latin-1, we must # therefore re-encode the final representation. literals = [None if s is None else s.encode('latin-1') for s in literals] return groups, literals def expand_template(template, match): g = match.group empty = match.string[:0] groups, literals = template literals = literals[:] try: for index, group in groups: literals[index] = g(group) or empty except IndexError: raise error("invalid group reference %d" % index) return empty.join(literals)
Save