Fix for Issue #42

This commit is contained in:
Miroslav Stampar
2012-06-28 13:55:30 +02:00
parent f495cfa139
commit 2a72fcce2b
3 changed files with 5 additions and 5 deletions

View File

@@ -63,7 +63,7 @@ CHAR_INFERENCE_MARK = "%c"
PRINTABLE_CHAR_REGEX = r"[^\x00-\x1f\x7e-\xff]"
# regular expression used for extracting results from google search
GOOGLE_REGEX = r"url\?q=(http[^>]+)&sa=U&amp"
GOOGLE_REGEX = r"url\?\w+=(http[^>]+)&(sa=U|rct=j)"
# regular expression used for extracting content from "textual" tags
TEXT_TAG_REGEX = r"(?si)<(abbr|acronym|b|blockquote|br|center|cite|code|dt|em|font|h\d|i|li|p|pre|q|strong|sub|sup|td|th|title|tt|u)(?!\w).*?>(?P<result>[^<]+)"