mirror of
https://github.com/sqlmapproject/sqlmap.git
synced 2025-12-08 05:31:32 +00:00
Some more DREI stuff
This commit is contained in:
61
thirdparty/beautifulsoup/beautifulsoup.py
vendored
61
thirdparty/beautifulsoup/beautifulsoup.py
vendored
@@ -91,6 +91,11 @@ import sys
|
||||
|
||||
if sys.version_info >= (3, 0):
|
||||
xrange = range
|
||||
text_type = str
|
||||
binary_type = bytes
|
||||
else:
|
||||
text_type = unicode
|
||||
binary_type = str
|
||||
|
||||
try:
|
||||
from htmlentitydefs import name2codepoint
|
||||
@@ -434,19 +439,13 @@ class PageElement(object):
|
||||
def toEncoding(self, s, encoding=None):
|
||||
"""Encodes an object to a string in some encoding, or to Unicode.
|
||||
."""
|
||||
if isinstance(s, unicode):
|
||||
if isinstance(s, text_type):
|
||||
if encoding:
|
||||
s = s.encode(encoding)
|
||||
elif isinstance(s, str):
|
||||
if encoding:
|
||||
s = s.encode(encoding)
|
||||
else:
|
||||
s = unicode(s)
|
||||
elif isinstance(s, binary_type):
|
||||
s = s.encode(encoding or "utf8")
|
||||
else:
|
||||
if encoding:
|
||||
s = self.toEncoding(str(s), encoding)
|
||||
else:
|
||||
s = unicode(s)
|
||||
s = self.toEncoding(str(s), encoding or "utf8")
|
||||
return s
|
||||
|
||||
BARE_AMPERSAND_OR_BRACKET = re.compile("([<>]|"
|
||||
@@ -459,7 +458,7 @@ class PageElement(object):
|
||||
return "&" + self.XML_SPECIAL_CHARS_TO_ENTITIES[x.group(0)[0]] + ";"
|
||||
|
||||
|
||||
class NavigableString(unicode, PageElement):
|
||||
class NavigableString(text_type, PageElement):
|
||||
|
||||
def __new__(cls, value):
|
||||
"""Create a new NavigableString.
|
||||
@@ -469,9 +468,9 @@ class NavigableString(unicode, PageElement):
|
||||
passed in to the superclass's __new__ or the superclass won't know
|
||||
how to handle non-ASCII characters.
|
||||
"""
|
||||
if isinstance(value, unicode):
|
||||
return unicode.__new__(cls, value)
|
||||
return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
|
||||
if isinstance(value, text_type):
|
||||
return text_type.__new__(cls, value)
|
||||
return text_type.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
|
||||
|
||||
def __getnewargs__(self):
|
||||
return (NavigableString.__str__(self),)
|
||||
@@ -1006,7 +1005,7 @@ class SoupStrainer:
|
||||
if isinstance(markup, Tag):
|
||||
markup = markup.name
|
||||
if markup and not isinstance(markup, basestring):
|
||||
markup = unicode(markup)
|
||||
markup = text_type(markup)
|
||||
#Now we know that chunk is either a string, or None.
|
||||
if hasattr(matchAgainst, 'match'):
|
||||
# It's a regexp object.
|
||||
@@ -1016,8 +1015,8 @@ class SoupStrainer:
|
||||
elif hasattr(matchAgainst, 'items'):
|
||||
result = markup.has_key(matchAgainst)
|
||||
elif matchAgainst and isinstance(markup, basestring):
|
||||
if isinstance(markup, unicode):
|
||||
matchAgainst = unicode(matchAgainst)
|
||||
if isinstance(markup, text_type):
|
||||
matchAgainst = text_type(matchAgainst)
|
||||
else:
|
||||
matchAgainst = str(matchAgainst)
|
||||
|
||||
@@ -1181,7 +1180,7 @@ class BeautifulStoneSoup(Tag, sgmllib.SGMLParser):
|
||||
def _feed(self, inDocumentEncoding=None, isHTML=False):
|
||||
# Convert the document to Unicode.
|
||||
markup = self.markup
|
||||
if isinstance(markup, unicode):
|
||||
if isinstance(markup, text_type):
|
||||
if not hasattr(self, 'originalEncoding'):
|
||||
self.originalEncoding = None
|
||||
else:
|
||||
@@ -1792,9 +1791,9 @@ class UnicodeDammit:
|
||||
self._detectEncoding(markup, isHTML)
|
||||
self.smartQuotesTo = smartQuotesTo
|
||||
self.triedEncodings = []
|
||||
if markup == '' or isinstance(markup, unicode):
|
||||
if markup == '' or isinstance(markup, text_type):
|
||||
self.originalEncoding = None
|
||||
self.unicode = unicode(markup)
|
||||
self.unicode = text_type(markup)
|
||||
return
|
||||
|
||||
u = None
|
||||
@@ -1807,7 +1806,7 @@ class UnicodeDammit:
|
||||
if u: break
|
||||
|
||||
# If no luck and we have auto-detection library, try that:
|
||||
if not u and chardet and not isinstance(self.markup, unicode):
|
||||
if not u and chardet and not isinstance(self.markup, text_type):
|
||||
u = self._convertFrom(chardet.detect(self.markup)['encoding'])
|
||||
|
||||
# As a last resort, try utf-8 and windows-1252:
|
||||
@@ -1880,7 +1879,7 @@ class UnicodeDammit:
|
||||
elif data[:4] == '\xff\xfe\x00\x00':
|
||||
encoding = 'utf-32le'
|
||||
data = data[4:]
|
||||
newdata = unicode(data, encoding)
|
||||
newdata = text_type(data, encoding)
|
||||
return newdata
|
||||
|
||||
def _detectEncoding(self, xml_data, isHTML=False):
|
||||
@@ -1893,41 +1892,41 @@ class UnicodeDammit:
|
||||
elif xml_data[:4] == '\x00\x3c\x00\x3f':
|
||||
# UTF-16BE
|
||||
sniffed_xml_encoding = 'utf-16be'
|
||||
xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
|
||||
xml_data = text_type(xml_data, 'utf-16be').encode('utf-8')
|
||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
|
||||
and (xml_data[2:4] != '\x00\x00'):
|
||||
# UTF-16BE with BOM
|
||||
sniffed_xml_encoding = 'utf-16be'
|
||||
xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
|
||||
xml_data = text_type(xml_data[2:], 'utf-16be').encode('utf-8')
|
||||
elif xml_data[:4] == '\x3c\x00\x3f\x00':
|
||||
# UTF-16LE
|
||||
sniffed_xml_encoding = 'utf-16le'
|
||||
xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
|
||||
xml_data = text_type(xml_data, 'utf-16le').encode('utf-8')
|
||||
elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
|
||||
(xml_data[2:4] != '\x00\x00'):
|
||||
# UTF-16LE with BOM
|
||||
sniffed_xml_encoding = 'utf-16le'
|
||||
xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
|
||||
xml_data = text_type(xml_data[2:], 'utf-16le').encode('utf-8')
|
||||
elif xml_data[:4] == '\x00\x00\x00\x3c':
|
||||
# UTF-32BE
|
||||
sniffed_xml_encoding = 'utf-32be'
|
||||
xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
|
||||
xml_data = text_type(xml_data, 'utf-32be').encode('utf-8')
|
||||
elif xml_data[:4] == '\x3c\x00\x00\x00':
|
||||
# UTF-32LE
|
||||
sniffed_xml_encoding = 'utf-32le'
|
||||
xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
|
||||
xml_data = text_type(xml_data, 'utf-32le').encode('utf-8')
|
||||
elif xml_data[:4] == '\x00\x00\xfe\xff':
|
||||
# UTF-32BE with BOM
|
||||
sniffed_xml_encoding = 'utf-32be'
|
||||
xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
|
||||
xml_data = text_type(xml_data[4:], 'utf-32be').encode('utf-8')
|
||||
elif xml_data[:4] == '\xff\xfe\x00\x00':
|
||||
# UTF-32LE with BOM
|
||||
sniffed_xml_encoding = 'utf-32le'
|
||||
xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
|
||||
xml_data = text_type(xml_data[4:], 'utf-32le').encode('utf-8')
|
||||
elif xml_data[:3] == '\xef\xbb\xbf':
|
||||
# UTF-8 with BOM
|
||||
sniffed_xml_encoding = 'utf-8'
|
||||
xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
|
||||
xml_data = text_type(xml_data[3:], 'utf-8').encode('utf-8')
|
||||
else:
|
||||
sniffed_xml_encoding = 'ascii'
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user