Some more DREI stuff

This commit is contained in:
Miroslav Stampar
2019-04-19 11:24:34 +02:00
parent da15701a55
commit bb7bd51d94
15 changed files with 94 additions and 71 deletions

View File

@@ -32,6 +32,7 @@ from lib.core.threads import getCurrentThreadData
from lib.core.threads import runThreads
from lib.parse.sitemap import parseSitemap
from lib.request.connect import Connect as Request
from thirdparty import six
from thirdparty.beautifulsoup.beautifulsoup import BeautifulSoup
from thirdparty.six.moves import http_client as _http_client
from thirdparty.six.moves import urllib as _urllib
@@ -79,7 +80,7 @@ def crawl(target):
if not kb.threadContinue:
break
if isinstance(content, unicode):
if isinstance(content, six.text_type):
try:
match = re.search(r"(?si)<html[^>]*>(.+)</html>", content)
if match: