This commit is contained in:
Miroslav Stampar
2015-08-31 10:24:05 +02:00
parent d2a9c7584f
commit d70215ad6c
2 changed files with 64 additions and 42 deletions

View File

@@ -75,6 +75,12 @@ GOOGLE_REGEX = r"url\?\w+=((?![^>]+webcache\.googleusercontent\.com)http[^>]+)&(
# Regular expression used for extracting results from DuckDuckGo search
DUCKDUCKGO_REGEX = r'"u":"([^"]+)'
# Regular expression used for extracting results from Disconnect Search
DISCONNECT_SEARCH_REGEX = r'<p class="url wrapword">([^<]+)</p>'
# Dummy user agent for search (if default one returns different results)
DUMMY_SEARCH_USER_AGENT = "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:40.0) Gecko/20100101 Firefox/40.0"
# Regular expression used for extracting content from "textual" tags
TEXT_TAG_REGEX = r"(?si)<(abbr|acronym|b|blockquote|br|center|cite|code|dt|em|font|h\d|i|li|p|pre|q|strong|sub|sup|td|th|title|tt|u)(?!\w).*?>(?P<result>[^<]+)"