bitbake: wget.py: parse only <a> tags

For two reasons:
1) The important one: we hit the following bug when doing upstream version checks
on some webpages:
https://bugs.launchpad.net/beautifulsoup/+bug/1471755

2) Also, documentation for beautifulsoup states that memory usage and
speed is improved that way.

(Bitbake rev: 7546d4aeb3ba8fda9832081b84d93138dc5e58d6)

Signed-off-by: Alexander Kanavin <alexander.kanavin@linux.intel.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
Alexander Kanavin 2015-12-04 13:00:20 +02:00 committed by Richard Purdie
parent 71ede7b689
commit 9d19dd9bd7
1 changed files with 3 additions and 2 deletions

View File

@ -38,6 +38,7 @@ from bb.fetch2 import FetchError
from bb.fetch2 import logger
from bb.fetch2 import runfetchcmd
from bs4 import BeautifulSoup
from bs4 import SoupStrainer
class Wget(FetchMethod):
"""Class to fetch urls via 'wget'"""
@ -367,7 +368,7 @@ class Wget(FetchMethod):
version = ['', '', '']
bb.debug(3, "VersionURL: %s" % (url))
soup = BeautifulSoup(self._fetch_index(url, ud, d))
soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
if not soup:
bb.debug(3, "*** %s NO SOUP" % (url))
return ""
@ -417,7 +418,7 @@ class Wget(FetchMethod):
ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d))
soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
if not soup:
return version[1]