Trivial code style updates

This commit is contained in:
Miroslav Stampar
2019-04-19 13:54:48 +02:00
parent 10fe87fb4e
commit e7469ab570
14 changed files with 73 additions and 54 deletions

View File

@@ -21,9 +21,9 @@ __all__ = ["SGMLParser", "SGMLParseError"]
interesting = re.compile('[&<]')
incomplete = re.compile('&([a-zA-Z][a-zA-Z0-9]*|#[0-9]*)?|'
'<([a-zA-Z][^<>]*|'
'/([a-zA-Z][^<>]*)?|'
'![^<>]*)?')
'<([a-zA-Z][^<>]*|'
'/([a-zA-Z][^<>]*)?|'
'![^<>]*)?')
entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]')
charref = re.compile('&#([0-9]+)[^0-9]')
@@ -58,8 +58,8 @@ class SGMLParseError(RuntimeError):
class SGMLParser(_markupbase.ParserBase):
# Definition of entities -- derived classes may override
entity_or_charref = re.compile('&(?:'
'([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
')(;?)')
'([a-zA-Z][-.a-zA-Z0-9]*)|#([0-9]+)'
')(;?)')
def __init__(self, verbose=0):
"""Initialize and reset this instance."""
@@ -121,32 +121,37 @@ class SGMLParser(_markupbase.ParserBase):
i = n
break
match = interesting.search(rawdata, i)
if match: j = match.start()
else: j = n
if match:
j = match.start()
else:
j = n
if i < j:
self.handle_data(rawdata[i:j])
i = j
if i == n: break
if i == n:
break
if rawdata[i] == '<':
if starttagopen.match(rawdata, i):
if self.literal:
self.handle_data(rawdata[i])
i = i+1
i = i + 1
continue
k = self.parse_starttag(i)
if k < 0: break
if k < 0:
break
i = k
continue
if rawdata.startswith("</", i):
k = self.parse_endtag(i)
if k < 0: break
if k < 0:
break
i = k
self.literal = 0
continue
if self.literal:
if n > (i + 1):
self.handle_data("<")
i = i+1
i = i + 1
else:
# incomplete
break
@@ -157,12 +162,14 @@ class SGMLParser(_markupbase.ParserBase):
# This should be removed,
# and comments handled only in parse_declaration.
k = self.parse_comment(i)
if k < 0: break
if k < 0:
break
i = k
continue
if rawdata.startswith("<?", i):
k = self.parse_pi(i)
if k < 0: break
if k < 0:
break
i = i+k
continue
if rawdata.startswith("<!", i):
@@ -170,27 +177,30 @@ class SGMLParser(_markupbase.ParserBase):
# deployed," this should only be the document type
# declaration ("<!DOCTYPE html...>").
k = self.parse_declaration(i)
if k < 0: break
if k < 0:
break
i = k
continue
elif rawdata[i] == '&':
if self.literal:
self.handle_data(rawdata[i])
i = i+1
i = i + 1
continue
match = charref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_charref(name)
i = match.end(0)
if rawdata[i-1] != ';': i = i-1
if rawdata[i-1] != ';':
i = i-1
continue
match = entityref.match(rawdata, i)
if match:
name = match.group(1)
self.handle_entityref(name)
i = match.end(0)
if rawdata[i-1] != ';': i = i-1
if rawdata[i-1] != ';':
i = i-1
continue
else:
self.error('neither < nor & ??')
@@ -199,11 +209,11 @@ class SGMLParser(_markupbase.ParserBase):
match = incomplete.match(rawdata, i)
if not match:
self.handle_data(rawdata[i])
i = i+1
i = i + 1
continue
j = match.end(0)
if j == n:
break # Really incomplete
break # Really incomplete
self.handle_data(rawdata[i:j])
i = j
# end while
@@ -256,32 +266,33 @@ class SGMLParser(_markupbase.ParserBase):
# As a shortcut way to exit, this isn't so bad, but shouldn't
# be used to locate the actual end of the start tag since the
# < or > characters may be embedded in an attribute value.
match = endbracket.search(rawdata, i+1)
match = endbracket.search(rawdata, i + 1)
if not match:
return -1
j = match.start(0)
# Now parse the data between i+1 and j into a tag and attrs
# Now parse the data between i + 1 and j into a tag and attrs
attrs = []
if rawdata[i:i+2] == '<>':
# SGML shorthand: <> == <last open tag seen>
k = j
tag = self.lasttag
else:
match = tagfind.match(rawdata, i+1)
match = tagfind.match(rawdata, i + 1)
if not match:
self.error('unexpected call to parse_starttag')
k = match.end(0)
tag = rawdata[i+1:k].lower()
tag = rawdata[i + 1:k].lower()
self.lasttag = tag
while k < j:
match = attrfind.match(rawdata, k)
if not match: break
if not match:
break
attrname, rest, attrvalue = match.group(1, 2, 3)
if not rest:
attrvalue = attrname
else:
if (attrvalue[:1] == "'" == attrvalue[-1:] or
attrvalue[:1] == '"' == attrvalue[-1:]):
attrvalue[:1] == '"' == attrvalue[-1:]):
# strip quotes
attrvalue = attrvalue[1:-1]
attrvalue = self.entity_or_charref.sub(
@@ -289,7 +300,7 @@ class SGMLParser(_markupbase.ParserBase):
attrs.append((attrname.lower(), attrvalue))
k = match.end(0)
if rawdata[j] == '>':
j = j+1
j = j + 1
self.__starttag_text = rawdata[start_pos:j]
self.finish_starttag(tag, attrs)
return j
@@ -308,13 +319,13 @@ class SGMLParser(_markupbase.ParserBase):
# Internal -- parse endtag
def parse_endtag(self, i):
rawdata = self.rawdata
match = endbracket.search(rawdata, i+1)
match = endbracket.search(rawdata, i + 1)
if not match:
return -1
j = match.start(0)
tag = rawdata[i+2:j].strip().lower()
if rawdata[j] == '>':
j = j+1
j = j + 1
self.finish_endtag(tag)
return j
@@ -361,7 +372,8 @@ class SGMLParser(_markupbase.ParserBase):
return
found = len(self.stack)
for i in range(found):
if self.stack[i] == tag: found = i
if self.stack[i] == tag:
found = i
while len(self.stack) > found:
tag = self.stack[-1]
try:
@@ -411,7 +423,7 @@ class SGMLParser(_markupbase.ParserBase):
# Definition of entities -- derived classes may override
entitydefs = \
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
{'lt': '<', 'gt': '>', 'amp': '&', 'quot': '"', 'apos': '\''}
def convert_entityref(self, name):
"""Convert entity references.
@@ -450,10 +462,17 @@ class SGMLParser(_markupbase.ParserBase):
pass
# To be overridden -- handlers for unknown objects
def unknown_starttag(self, tag, attrs): pass
def unknown_endtag(self, tag): pass
def unknown_charref(self, ref): pass
def unknown_entityref(self, ref): pass
def unknown_starttag(self, tag, attrs):
pass
def unknown_endtag(self, tag):
pass
def unknown_charref(self, ref):
pass
def unknown_entityref(self, ref):
pass
class TestSGMLParser(SGMLParser):
@@ -511,7 +530,7 @@ class TestSGMLParser(SGMLParser):
self.flush()
def test(args = None):
def test(args=None):
import sys
if args is None:
@@ -548,4 +567,4 @@ def test(args = None):
if __name__ == '__main__':
test()
test()