bitbake: wget.py: parse only <a> tags
For two reasons: 1) The important one: we hit the following bug when doing upstream version checks on some webpages: https://bugs.launchpad.net/beautifulsoup/+bug/1471755 2) Also, documentation for beautifulsoup states that memory usage and speed is improved that way. (Bitbake rev: 7546d4aeb3ba8fda9832081b84d93138dc5e58d6) Signed-off-by: Alexander Kanavin <alexander.kanavin@linux.intel.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
This commit is contained in:
parent
71ede7b689
commit
9d19dd9bd7
|
@ -38,6 +38,7 @@ from bb.fetch2 import FetchError
|
|||
from bb.fetch2 import logger
|
||||
from bb.fetch2 import runfetchcmd
|
||||
from bs4 import BeautifulSoup
|
||||
from bs4 import SoupStrainer
|
||||
|
||||
class Wget(FetchMethod):
|
||||
"""Class to fetch urls via 'wget'"""
|
||||
|
@ -367,7 +368,7 @@ class Wget(FetchMethod):
|
|||
version = ['', '', '']
|
||||
|
||||
bb.debug(3, "VersionURL: %s" % (url))
|
||||
soup = BeautifulSoup(self._fetch_index(url, ud, d))
|
||||
soup = BeautifulSoup(self._fetch_index(url, ud, d), "html.parser", parse_only=SoupStrainer("a"))
|
||||
if not soup:
|
||||
bb.debug(3, "*** %s NO SOUP" % (url))
|
||||
return ""
|
||||
|
@ -417,7 +418,7 @@ class Wget(FetchMethod):
|
|||
ud.path.split(dirver)[0], ud.user, ud.pswd, {}])
|
||||
bb.debug(3, "DirURL: %s, %s" % (dirs_uri, package))
|
||||
|
||||
soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d))
|
||||
soup = BeautifulSoup(self._fetch_index(dirs_uri, ud, d), "html.parser", parse_only=SoupStrainer("a"))
|
||||
if not soup:
|
||||
return version[1]
|
||||
|
||||
|
|
Loading…
Reference in New Issue