bitbake: wget.py: parse only <a> tags

For two reasons: 1) The important one: we hit the following bug when doing upstream version checks on some webpages: https://bugs.launchpad.net/beautifulsoup/+bug/1471755 2) Also, documentation for beautifulsoup states that memory usage and speed is improved that way. (Bitbake rev: 7546d4aeb3ba8fda9832081b84d93138dc5e58d6) Signed-off-by: Alexander Kanavin <alexander.kanavin@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
2015-12-04 13:00:20 +02:00 · 2015-12-04 13:00:20 +02:00 · 9d19dd9bd7
parent 71ede7b689
commit 9d19dd9bd7
1 changed files with 3 additions and 2 deletions
--- a/bitbake/lib/bb/fetch2/wget.py
+++ b/bitbake/lib/bb/fetch2/wget.py
@ -38,6 +38,7 @@ from   bb.fetch2 import FetchError
 from   bb.fetch2 import logger
 from   bb.fetch2 import runfetchcmd
 from   bs4 import BeautifulSoup
+from   bs4 import SoupStrainer

 class Wget(FetchMethod):
    """Class to fetch urls via 'wget'"""
@ -367,7 +368,7 @@ class Wget(FetchMethod):
        version = ['', '', '']

        bb.debug(3, "VersionURL: %s" % (url))
-        soup = BeautifulSoup(self._fetch_index(url, ud, d))
+        soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
        if not soup:
            bb.debug(3, "*** %s NO SOUP" % (url))
            return ""
@ -417,7 +418,7 @@ class Wget(FetchMethod):
                ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
        bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))

-        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d))
+        soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
        if not soup:
            return version[1]