Update for an Issue #565 (more work TBD - DuckDuckGo has some kind of IP blocking mechanism)

This commit is contained in:
Miroslav Stampar
2013-11-25 20:57:07 +01:00
parent 24e67289c8
commit 7054586e8a
2 changed files with 55 additions and 1 deletions

View File

@@ -53,9 +53,12 @@ PERMISSION_DENIED_REGEX = r"(command|permission|access)\s*(was|is)?\s*denied"
# Regular expression used for recognition of generic maximum connection messages
MAX_CONNECTIONS_REGEX = r"max.+connections"
# Regular expression used for extracting results from google search
# Regular expression used for extracting results from Google search
GOOGLE_REGEX = r"url\?\w+=((?![^>]+webcache\.googleusercontent\.com)http[^>]+)&(sa=U|rct=j)"
# Regular expression used for extracting results from DuckDuckGo search
DUCKDUCKGO_REGEX = r'"u":"([^"]+)'
# Regular expression used for extracting content from "textual" tags
TEXT_TAG_REGEX = r"(?si)<(abbr|acronym|b|blockquote|br|center|cite|code|dt|em|font|h\d|i|li|p|pre|q|strong|sub|sup|td|th|title|tt|u)(?!\w).*?>(?P<result>[^<]+)"