From e938331d8e0a05e89d3eec4f58b517731719244c Mon Sep 17 00:00:00 2001 From: Miroslav Stampar Date: Sat, 15 May 2010 22:02:28 +0000 Subject: [PATCH] better regex used avoiding garbage google images --- lib/utils/google.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/utils/google.py b/lib/utils/google.py index 615b4a1f1..db26cdaa6 100644 --- a/lib/utils/google.py +++ b/lib/utils/google.py @@ -54,7 +54,7 @@ class Google: matches = [] - regExpr = "class=\042?r\042?\076\074a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?" + regExpr = "li class=\042?g\042?\076.+?a href=\042(http[s]*://.+?)\042\sclass=\042?l\042?" matches = re.findall(regExpr, page, re.I | re.M) return matches