Message ID | 82da88bf02bf928d8807bc93bfb5fcdeece1f558.1721407122.git.jan.kiszka@siemens.com |
---|---|
State | Accepted, archived |
Headers | show |
Series | Introduce container fetcher and pre-loader | expand |
On Fri, 2024-07-19 at 18:38 +0200, Jan Kiszka wrote: > From: Jan Kiszka <jan.kiszka@siemens.com> > > This bitbake fetcher allows to pull container images from registries, > store them in the download cache and transfer them into the workdir of > recipes requesting the image. The format of the URL is > > docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>] > > Fetching without digest is supported but will cause a warning, just like > downloading via wget without a checksum. If tag is left out, "latest" is > used. > > In case a multi-arch image is specified, the fetcher will only pull for > the package architecture of the requesting recipe. The image is stored > compressed in docker-archive format and, wherever possible, hard-linked > from DL_DIR to WORKDIR. Future versions may also introduce full > unpacking of the fetched container layers in workdir if use cases come up. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > meta/classes/dpkg-base.bbclass | 6 +++ > meta/lib/container_fetcher.py | 86 ++++++++++++++++++++++++++++++++++ > 2 files changed, 92 insertions(+) > create mode 100644 meta/lib/container_fetcher.py > > diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg-base.bbclass > index 789d6c74..d90b32a9 100644 > --- a/meta/classes/dpkg-base.bbclass > +++ b/meta/classes/dpkg-base.bbclass > @@ -98,6 +98,12 @@ python() { > if len(d.getVar('SRC_APT').strip()) > 0: > bb.build.addtask('apt_unpack', 'do_patch', '', d) > bb.build.addtask('cleanall_apt', 'do_cleanall', '', d) > + > + # container docker fetcher > + import container_fetcher > + from bb.fetch2 import methods > + > + methods.append(container_fetcher.Container()) > } > > do_apt_fetch() { > diff --git a/meta/lib/container_fetcher.py b/meta/lib/container_fetcher.py > new file mode 100644 > index 00000000..0d659154 > --- /dev/null > +++ b/meta/lib/container_fetcher.py > @@ -0,0 +1,86 @@ > +# This software is a part of ISAR. > +# Copyright (c) Siemens AG, 2024 > +# > +# SPDX-License-Identifier: MIT > + > +import oe.path > +import os > +import tempfile > +from bb.fetch2 import FetchMethod > +from bb.fetch2 import logger > +from bb.fetch2 import MissingChecksumEvent > +from bb.fetch2 import NoChecksumError > +from bb.fetch2 import runfetchcmd > + > +class Container(FetchMethod): > + def supports(self, ud, d): > + return ud.type in ['docker'] > + > + def urldata_init(self, ud, d): > + ud.tag = "latest" > + if "tag" in ud.parm: > + ud.tag = ud.parm["tag"] > + > + ud.digest = None > + if "digest" in ud.parm: > + ud.digest = ud.parm["digest"] > + > + ud.arch = d.getVar('PACKAGE_ARCH') > + ud.variant = None > + if ud.arch == "armhf": > + ud.arch = "arm" > + ud.variant = "v7" > + elif ud.arch == "armel": > + ud.arch = "arm" > + ud.variant = "v6" > + > + ud.container_name = ud.host + (ud.path if ud.path != "/" else "") > + ud.container_src = ud.container_name + \ > + ("@" + ud.digest if ud.digest else ":" + ud.tag) > + ud.localname = ud.container_name.replace('/', '.') > + ud.localfile = "container-images/" + ud.arch + "/" + \ > + (ud.variant + "/" if ud.variant else "") + ud.localname + \ > + "_" + (ud.digest.replace(":", "-") if ud.digest else ud.tag) + \ > + ".zst" > + > + def download(self, ud, d): > + tarball = ud.localfile[:-len('.zst')] > + with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir: > + # Take a two steps for downloading into a docker archive because > + # not all source may have the required Docker schema 2 manifest. > + runfetchcmd("skopeo copy --preserve-digests " + \ > + f"--override-arch {ud.arch} " + \ > + (f"--override-variant {ud.variant} " if ud.variant else "") + \ > + f"docker://{ud.container_src} dir:{tmpdir}", d) > + runfetchcmd(f"skopeo copy dir:{tmpdir} " + \ > + f"docker-archive:{tarball}:{ud.container_name}:{ud.tag}", d) > + zstd_defaults = d.getVar('ZSTD_DEFAULTS') > + runfetchcmd(f"zstd -f --rm {zstd_defaults} {tarball}", d) do_fetch doesn't get triggered if "ZSTD_LEVEL" changes, but it should, since the output zst file changes with a different ZSTD_LEVEL. Even adding a do_fetch[vardeps] += "ZSTD_DEFAULTS" doesn't lead to the desired behavior: When ZSTD_LEVEL changes, the fetch task is run but does not actually fetch anything and immediately proceeds with do_unpack. More precisely, the download() Method of the Container() class is not getting executed, maybe due to some logic in the upper/surrounding Fetch() class. That would be fine if the compression task didn't implement the compression... Another thing to mention is that he whole fetch task is run again only if the compression changes. Skopeo seems to override already downloaded layers rather than skipping them, which means that changing the compression also means a complete new download process. Maybe the unpack() task for packing and hardlinking the images would be better, even though it sounds strange to put an compression task into the unpack task. Benedikt > + > + if ud.digest: > + return > + > + checksum = bb.utils.sha256_file(ud.localpath + "/manifest.json") > + checksum_line = f"SRC_URI = \"{ud.url};digest=sha256:{checksum}\"" > + > + strict = d.getVar("BB_STRICT_CHECKSUM") or "0" > + > + # If strict checking enabled and neither sum defined, raise error > + if strict == "1": > + raise NoChecksumError(checksum_line) > + > + checksum_event = {"sha256sum": checksum} > + bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d) > + > + if strict == "ignore": > + return > + > + # Log missing digest so user can more easily add it > + logger.warning( > + f"Missing checksum for '{ud.localpath}', consider using this " \ > + f"SRC_URI in the recipe:\n{checksum_line}") > + > + def unpack(self, ud, rootdir, d): > + image_file = ud.localname + ":" + ud.tag + ".zst" > + oe.path.remove(rootdir + "/" + image_file) > + oe.path.copyhardlink(ud.localpath, rootdir + "/" + image_file)
On Thu, 2024-07-25 at 10:48 +0000, 'Niedermayr, BENEDIKT' via isar-users wrote: > On Fri, 2024-07-19 at 18:38 +0200, Jan Kiszka wrote: > > From: Jan Kiszka <jan.kiszka@siemens.com> > > > > This bitbake fetcher allows to pull container images from registries, > > store them in the download cache and transfer them into the workdir of > > recipes requesting the image. The format of the URL is > > > > docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>] > > > > Fetching without digest is supported but will cause a warning, just like > > downloading via wget without a checksum. If tag is left out, "latest" is > > used. > > > > In case a multi-arch image is specified, the fetcher will only pull for > > the package architecture of the requesting recipe. The image is stored > > compressed in docker-archive format and, wherever possible, hard-linked > > from DL_DIR to WORKDIR. Future versions may also introduce full > > unpacking of the fetched container layers in workdir if use cases come up. > > > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > > --- > > meta/classes/dpkg-base.bbclass | 6 +++ > > meta/lib/container_fetcher.py | 86 ++++++++++++++++++++++++++++++++++ > > 2 files changed, 92 insertions(+) > > create mode 100644 meta/lib/container_fetcher.py > > > > diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg-base.bbclass > > index 789d6c74..d90b32a9 100644 > > --- a/meta/classes/dpkg-base.bbclass > > +++ b/meta/classes/dpkg-base.bbclass > > @@ -98,6 +98,12 @@ python() { > > if len(d.getVar('SRC_APT').strip()) > 0: > > bb.build.addtask('apt_unpack', 'do_patch', '', d) > > bb.build.addtask('cleanall_apt', 'do_cleanall', '', d) > > + > > + # container docker fetcher > > + import container_fetcher > > + from bb.fetch2 import methods > > + > > + methods.append(container_fetcher.Container()) > > } > > > > do_apt_fetch() { > > diff --git a/meta/lib/container_fetcher.py b/meta/lib/container_fetcher.py > > new file mode 100644 > > index 00000000..0d659154 > > --- /dev/null > > +++ b/meta/lib/container_fetcher.py > > @@ -0,0 +1,86 @@ > > +# This software is a part of ISAR. > > +# Copyright (c) Siemens AG, 2024 > > +# > > +# SPDX-License-Identifier: MIT > > + > > +import oe.path > > +import os > > +import tempfile > > +from bb.fetch2 import FetchMethod > > +from bb.fetch2 import logger > > +from bb.fetch2 import MissingChecksumEvent > > +from bb.fetch2 import NoChecksumError > > +from bb.fetch2 import runfetchcmd > > + > > +class Container(FetchMethod): > > + def supports(self, ud, d): > > + return ud.type in ['docker'] > > + > > + def urldata_init(self, ud, d): > > + ud.tag = "latest" > > + if "tag" in ud.parm: > > + ud.tag = ud.parm["tag"] > > + > > + ud.digest = None > > + if "digest" in ud.parm: > > + ud.digest = ud.parm["digest"] > > + > > + ud.arch = d.getVar('PACKAGE_ARCH') > > + ud.variant = None > > + if ud.arch == "armhf": > > + ud.arch = "arm" > > + ud.variant = "v7" > > + elif ud.arch == "armel": > > + ud.arch = "arm" > > + ud.variant = "v6" > > + > > + ud.container_name = ud.host + (ud.path if ud.path != "/" else "") > > + ud.container_src = ud.container_name + \ > > + ("@" + ud.digest if ud.digest else ":" + ud.tag) > > + ud.localname = ud.container_name.replace('/', '.') > > + ud.localfile = "container-images/" + ud.arch + "/" + \ > > + (ud.variant + "/" if ud.variant else "") + ud.localname + \ > > + "_" + (ud.digest.replace(":", "-") if ud.digest else ud.tag) + \ > > + ".zst" > > + > > + def download(self, ud, d): > > + tarball = ud.localfile[:-len('.zst')] > > + with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir: > > + # Take a two steps for downloading into a docker archive because > > + # not all source may have the required Docker schema 2 manifest. > > + runfetchcmd("skopeo copy --preserve-digests " + \ > > + f"--override-arch {ud.arch} " + \ > > + (f"--override-variant {ud.variant} " if ud.variant else "") + \ > > + f"docker://{ud.container_src} dir:{tmpdir}", d) > > + runfetchcmd(f"skopeo copy dir:{tmpdir} " + \ > > + f"docker-archive:{tarball}:{ud.container_name}:{ud.tag}", d) > > + zstd_defaults = d.getVar('ZSTD_DEFAULTS') > > + runfetchcmd(f"zstd -f --rm {zstd_defaults} {tarball}", d) > > do_fetch doesn't get triggered if "ZSTD_LEVEL" changes, but it should, since the > output zst file changes with a different ZSTD_LEVEL. > > Even adding a do_fetch[vardeps] += "ZSTD_DEFAULTS" doesn't lead to the desired behavior: > > When ZSTD_LEVEL changes, the fetch task is run but does not actually fetch anything and > immediately proceeds with do_unpack. More precisely, the download() Method of the Container() > class is not getting executed, maybe due to some logic in the upper/surrounding Fetch() class. > That would be fine if the compression task didn't implement the compression... Sorry wrong wording: That would be fine if the FETCH task didn't implement the compression... Benedikt > > Another thing to mention is that he whole fetch task is run again only if the compression changes. > Skopeo seems to override already downloaded layers rather than skipping them, which means that > changing the compression also means a complete new download process. > > Maybe the unpack() task for packing and hardlinking the images would be better, even though it > sounds strange to put an compression task into the unpack task. > > Benedikt > > > + > > + if ud.digest: > > + return > > + > > + checksum = bb.utils.sha256_file(ud.localpath + "/manifest.json") > > + checksum_line = f"SRC_URI = \"{ud.url};digest=sha256:{checksum}\"" > > + > > + strict = d.getVar("BB_STRICT_CHECKSUM") or "0" > > + > > + # If strict checking enabled and neither sum defined, raise error > > + if strict == "1": > > + raise NoChecksumError(checksum_line) > > + > > + checksum_event = {"sha256sum": checksum} > > + bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d) > > + > > + if strict == "ignore": > > + return > > + > > + # Log missing digest so user can more easily add it > > + logger.warning( > > + f"Missing checksum for '{ud.localpath}', consider using this " \ > > + f"SRC_URI in the recipe:\n{checksum_line}") > > + > > + def unpack(self, ud, rootdir, d): > > + image_file = ud.localname + ":" + ud.tag + ".zst" > > + oe.path.remove(rootdir + "/" + image_file) > > + oe.path.copyhardlink(ud.localpath, rootdir + "/" + image_file) >
diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg-base.bbclass index 789d6c74..d90b32a9 100644 --- a/meta/classes/dpkg-base.bbclass +++ b/meta/classes/dpkg-base.bbclass @@ -98,6 +98,12 @@ python() { if len(d.getVar('SRC_APT').strip()) > 0: bb.build.addtask('apt_unpack', 'do_patch', '', d) bb.build.addtask('cleanall_apt', 'do_cleanall', '', d) + + # container docker fetcher + import container_fetcher + from bb.fetch2 import methods + + methods.append(container_fetcher.Container()) } do_apt_fetch() { diff --git a/meta/lib/container_fetcher.py b/meta/lib/container_fetcher.py new file mode 100644 index 00000000..0d659154 --- /dev/null +++ b/meta/lib/container_fetcher.py @@ -0,0 +1,86 @@ +# This software is a part of ISAR. +# Copyright (c) Siemens AG, 2024 +# +# SPDX-License-Identifier: MIT + +import oe.path +import os +import tempfile +from bb.fetch2 import FetchMethod +from bb.fetch2 import logger +from bb.fetch2 import MissingChecksumEvent +from bb.fetch2 import NoChecksumError +from bb.fetch2 import runfetchcmd + +class Container(FetchMethod): + def supports(self, ud, d): + return ud.type in ['docker'] + + def urldata_init(self, ud, d): + ud.tag = "latest" + if "tag" in ud.parm: + ud.tag = ud.parm["tag"] + + ud.digest = None + if "digest" in ud.parm: + ud.digest = ud.parm["digest"] + + ud.arch = d.getVar('PACKAGE_ARCH') + ud.variant = None + if ud.arch == "armhf": + ud.arch = "arm" + ud.variant = "v7" + elif ud.arch == "armel": + ud.arch = "arm" + ud.variant = "v6" + + ud.container_name = ud.host + (ud.path if ud.path != "/" else "") + ud.container_src = ud.container_name + \ + ("@" + ud.digest if ud.digest else ":" + ud.tag) + ud.localname = ud.container_name.replace('/', '.') + ud.localfile = "container-images/" + ud.arch + "/" + \ + (ud.variant + "/" if ud.variant else "") + ud.localname + \ + "_" + (ud.digest.replace(":", "-") if ud.digest else ud.tag) + \ + ".zst" + + def download(self, ud, d): + tarball = ud.localfile[:-len('.zst')] + with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir: + # Take a two steps for downloading into a docker archive because + # not all source may have the required Docker schema 2 manifest. + runfetchcmd("skopeo copy --preserve-digests " + \ + f"--override-arch {ud.arch} " + \ + (f"--override-variant {ud.variant} " if ud.variant else "") + \ + f"docker://{ud.container_src} dir:{tmpdir}", d) + runfetchcmd(f"skopeo copy dir:{tmpdir} " + \ + f"docker-archive:{tarball}:{ud.container_name}:{ud.tag}", d) + zstd_defaults = d.getVar('ZSTD_DEFAULTS') + runfetchcmd(f"zstd -f --rm {zstd_defaults} {tarball}", d) + + if ud.digest: + return + + checksum = bb.utils.sha256_file(ud.localpath + "/manifest.json") + checksum_line = f"SRC_URI = \"{ud.url};digest=sha256:{checksum}\"" + + strict = d.getVar("BB_STRICT_CHECKSUM") or "0" + + # If strict checking enabled and neither sum defined, raise error + if strict == "1": + raise NoChecksumError(checksum_line) + + checksum_event = {"sha256sum": checksum} + bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d) + + if strict == "ignore": + return + + # Log missing digest so user can more easily add it + logger.warning( + f"Missing checksum for '{ud.localpath}', consider using this " \ + f"SRC_URI in the recipe:\n{checksum_line}") + + def unpack(self, ud, rootdir, d): + image_file = ud.localname + ":" + ud.tag + ".zst" + oe.path.remove(rootdir + "/" + image_file) + oe.path.copyhardlink(ud.localpath, rootdir + "/" + image_file)