Message ID | b0493bec83353990199120a5fcf43676e995bba8.1721139489.git.jan.kiszka@siemens.com |
---|---|
State | Superseded, archived |
Headers | show |
Series | Introduce container fetcher and pre-loader | expand |
On Tue, 2024-07-16 at 16:18 +0200, Jan Kiszka wrote: > From: Jan Kiszka <jan.kiszka@siemens.com> > > This bitbake fetcher allows to pull container images from registries, > store them in the download cache and transfer them into the workdir > of > recipes requesting the image. The format of the URL is > > docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>] > > Fetching without digest is supported but will cause a warning, just > like > downloading via wget without a checksum. This is fine, as long as the upstream artifact is expected to be stable. > > If tag is left out, "latest" is > used. The tag should be mandatory and it should be clear that - even without digest - the artifact needs to be stable. Floating tags create all kinds of issues (w.r.t. the sstate cache and reproducible builds), so I vote for just not allowing this. > > The fetcher will try to pull all available variants of a multi-arch > image. If this is not needed, you can also directly specify the image > digest of a specific architecture. In most cases this does not make sense. I propose to always limit the fetching to the current architecture. For that, we either need a mapping between the debian architecture and the OCI architectures (what is specified in the application/vnd.oci.image.index.v1+json manifest), or we simply don't support index manifests at all and force people to use image manifests (application/vnd.oci.image.manifest.v1+json). Felix > > Future versions may also introduce full unpacking of the fetched > container layers in workdir if use cases come up. > > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com> > --- > meta/classes/dpkg-base.bbclass | 6 +++ > meta/lib/container_fetcher.py | 90 > ++++++++++++++++++++++++++++++++++ > 2 files changed, 96 insertions(+) > create mode 100644 meta/lib/container_fetcher.py > > diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg- > base.bbclass > index 789d6c74..d90b32a9 100644 > --- a/meta/classes/dpkg-base.bbclass > +++ b/meta/classes/dpkg-base.bbclass > @@ -98,6 +98,12 @@ python() { > if len(d.getVar('SRC_APT').strip()) > 0: > bb.build.addtask('apt_unpack', 'do_patch', '', d) > bb.build.addtask('cleanall_apt', 'do_cleanall', '', d) > + > + # container docker fetcher > + import container_fetcher > + from bb.fetch2 import methods > + > + methods.append(container_fetcher.Container()) > } > > do_apt_fetch() { > diff --git a/meta/lib/container_fetcher.py > b/meta/lib/container_fetcher.py > new file mode 100644 > index 00000000..8513e246 > --- /dev/null > +++ b/meta/lib/container_fetcher.py > @@ -0,0 +1,90 @@ > +# This software is a part of ISAR. > +# Copyright (c) Siemens AG, 2024 > +# > +# SPDX-License-Identifier: MIT > + > +import os > +import re > +from bb.fetch2 import FetchMethod > +from bb.fetch2 import logger > +from bb.fetch2 import MissingChecksumEvent > +from bb.fetch2 import NoChecksumError > +from bb.fetch2 import runfetchcmd > +from bb.progress import LineFilterProgressHandler > + > +class SkopeoProgressHandler(LineFilterProgressHandler): > + def __init__(self, d): > + super(SkopeoProgressHandler, self).__init__(d) > + self._fire_progress(0) > + > + def writeline(self, line): > + match = re.findall(r'^Copying image .*\(([0-9]+/[0-9]+)\)$', > line) > + if match: > + state = match[0].split('/') > + progress = (int(state[0]) * 100) / int(state[1]) > + self.update(progress) > + return True > + > + > +class Container(FetchMethod): > + def supports(self, ud, d): > + return ud.type in ['docker'] > + > + def urldata_init(self, ud, d): > + ud.tag = "latest" > + if "tag" in ud.parm: > + ud.tag = ud.parm["tag"] > + > + ud.digest = None > + if "digest" in ud.parm: > + ud.digest = ud.parm["digest"] > + > + container_name = ud.host + (ud.path if ud.path != "/" else > "") > + ud.container_src = container_name + \ > + ("@" + ud.digest if ud.digest else ":" + ud.tag) > + ud.localname = container_name.replace('/', '.') > + ud.localfile = "container-images/" + ud.localname + \ > + "_" + (ud.digest.replace(":", "-") if ud.digest else > ud.tag) > + > + def download(self, ud, d): > + progresshandler = SkopeoProgressHandler(d) > + runfetchcmd(f"skopeo copy --preserve-digests --all > docker://{ud.container_src} dir:{ud.localfile}", > + d, log=progresshandler) > + > + if ud.digest: > + return > + > + checksum = bb.utils.sha256_file(ud.localpath + > "/manifest.json") > + checksum_line = f"SRC_URI = > \"{ud.url};digest=sha256:{checksum}\"" > + > + strict = d.getVar("BB_STRICT_CHECKSUM") or "0" > + > + # If strict checking enabled and neither sum defined, raise > error > + if strict == "1": > + raise NoChecksumError(checksum_line) > + > + checksum_event = {"sha256sum": checksum} > + bb.event.fire(MissingChecksumEvent(ud.url, > **checksum_event), d) > + > + if strict == "ignore": > + return > + > + # Log missing digest so user can more easily add it > + logger.warning( > + f"Missing checksum for '{ud.localpath}', consider using > this " \ > + f"SRC_URI in the recipe:\n{checksum_line}") > + > + def unpack(self, ud, rootdir, d): > + arch = d.getVar('PACKAGE_ARCH') > + variant_opt = "" > + if arch == "armhf": > + arch = "arm" > + variant_opt = "--override-variant v7" > + elif arch == "armel": > + arch = "arm" > + variant_opt = "--override-variant v6" > + runfetchcmd(f"skopeo --override-arch {arch} {variant_opt} " > \ > + f"copy dir:{ud.localpath} dir:{rootdir + '/' + > ud.localname}", d) > + > + def clean(self, ud, d): > + bb.utils.remove(ud.localpath, recurse=True)
On 17.07.24 13:50, Moessbauer, Felix (T CED OES-DE) wrote: > On Tue, 2024-07-16 at 16:18 +0200, Jan Kiszka wrote: >> From: Jan Kiszka <jan.kiszka@siemens.com> >> >> This bitbake fetcher allows to pull container images from registries, >> store them in the download cache and transfer them into the workdir >> of >> recipes requesting the image. The format of the URL is >> >> docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>] >> >> Fetching without digest is supported but will cause a warning, just >> like >> downloading via wget without a checksum. > > This is fine, as long as the upstream artifact is expected to be > stable. > >> >> If tag is left out, "latest" is >> used. > > The tag should be mandatory and it should be clear that - even without > digest - the artifact needs to be stable. Floating tags create all > kinds of issues (w.r.t. the sstate cache and reproducible builds), so I > vote for just not allowing this. I cannot follow: Leaving out the tag has nothing to do with getting a stable image. It may just lead to something being tagged "latest" on the device that had a different tag (or none) in the registry. > >> >> The fetcher will try to pull all available variants of a multi-arch >> image. If this is not needed, you can also directly specify the image >> digest of a specific architecture. > > In most cases this does not make sense. I propose to always limit the > fetching to the current architecture. For that, we either need a > mapping between the debian architecture and the OCI architectures (what > is specified in the application/vnd.oci.image.index.v1+json manifest), > or we simply don't support index manifests at all and force people to > use image manifests (application/vnd.oci.image.manifest.v1+json). Don't worry, I already have ideas of redesigning this into per-arch fetches. Background is using less different formats on the build system and, where possible, even hard-link between them to save space and time with larger images. Jan
On Wed, 2024-07-17 at 18:02 +0200, Jan Kiszka wrote: > On 17.07.24 13:50, Moessbauer, Felix (T CED OES-DE) wrote: > > On Tue, 2024-07-16 at 16:18 +0200, Jan Kiszka wrote: > > > From: Jan Kiszka <jan.kiszka@siemens.com> > > > > > > This bitbake fetcher allows to pull container images from > > > registries, > > > store them in the download cache and transfer them into the > > > workdir > > > of > > > recipes requesting the image. The format of the URL is > > > > > > docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>] > > > > > > Fetching without digest is supported but will cause a warning, > > > just > > > like > > > downloading via wget without a checksum. > > > > This is fine, as long as the upstream artifact is expected to be > > stable. > > > > > > > > If tag is left out, "latest" is > > > used. > > > > The tag should be mandatory and it should be clear that - even > > without > > digest - the artifact needs to be stable. Floating tags create all > > kinds of issues (w.r.t. the sstate cache and reproducible builds), > > so I > > vote for just not allowing this. > > I cannot follow: Leaving out the tag has nothing to do with getting a > stable image. It may just lead to something being tagged "latest" on > the > device that had a different tag (or none) in the registry. Ok, that means the image is just tagged on the device. Then it should be fine. Felix > > > > > > > > > The fetcher will try to pull all available variants of a multi- > > > arch > > > image. If this is not needed, you can also directly specify the > > > image > > > digest of a specific architecture. > > > > In most cases this does not make sense. I propose to always limit > > the > > fetching to the current architecture. For that, we either need a > > mapping between the debian architecture and the OCI architectures > > (what > > is specified in the application/vnd.oci.image.index.v1+json > > manifest), > > or we simply don't support index manifests at all and force people > > to > > use image manifests (application/vnd.oci.image.manifest.v1+json). > > Don't worry, I already have ideas of redesigning this into per-arch > fetches. Background is using less different formats on the build > system > and, where possible, even hard-link between them to save space and > time > with larger images. > > Jan >
diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg-base.bbclass index 789d6c74..d90b32a9 100644 --- a/meta/classes/dpkg-base.bbclass +++ b/meta/classes/dpkg-base.bbclass @@ -98,6 +98,12 @@ python() { if len(d.getVar('SRC_APT').strip()) > 0: bb.build.addtask('apt_unpack', 'do_patch', '', d) bb.build.addtask('cleanall_apt', 'do_cleanall', '', d) + + # container docker fetcher + import container_fetcher + from bb.fetch2 import methods + + methods.append(container_fetcher.Container()) } do_apt_fetch() { diff --git a/meta/lib/container_fetcher.py b/meta/lib/container_fetcher.py new file mode 100644 index 00000000..8513e246 --- /dev/null +++ b/meta/lib/container_fetcher.py @@ -0,0 +1,90 @@ +# This software is a part of ISAR. +# Copyright (c) Siemens AG, 2024 +# +# SPDX-License-Identifier: MIT + +import os +import re +from bb.fetch2 import FetchMethod +from bb.fetch2 import logger +from bb.fetch2 import MissingChecksumEvent +from bb.fetch2 import NoChecksumError +from bb.fetch2 import runfetchcmd +from bb.progress import LineFilterProgressHandler + +class SkopeoProgressHandler(LineFilterProgressHandler): + def __init__(self, d): + super(SkopeoProgressHandler, self).__init__(d) + self._fire_progress(0) + + def writeline(self, line): + match = re.findall(r'^Copying image .*\(([0-9]+/[0-9]+)\)$', line) + if match: + state = match[0].split('/') + progress = (int(state[0]) * 100) / int(state[1]) + self.update(progress) + return True + + +class Container(FetchMethod): + def supports(self, ud, d): + return ud.type in ['docker'] + + def urldata_init(self, ud, d): + ud.tag = "latest" + if "tag" in ud.parm: + ud.tag = ud.parm["tag"] + + ud.digest = None + if "digest" in ud.parm: + ud.digest = ud.parm["digest"] + + container_name = ud.host + (ud.path if ud.path != "/" else "") + ud.container_src = container_name + \ + ("@" + ud.digest if ud.digest else ":" + ud.tag) + ud.localname = container_name.replace('/', '.') + ud.localfile = "container-images/" + ud.localname + \ + "_" + (ud.digest.replace(":", "-") if ud.digest else ud.tag) + + def download(self, ud, d): + progresshandler = SkopeoProgressHandler(d) + runfetchcmd(f"skopeo copy --preserve-digests --all docker://{ud.container_src} dir:{ud.localfile}", + d, log=progresshandler) + + if ud.digest: + return + + checksum = bb.utils.sha256_file(ud.localpath + "/manifest.json") + checksum_line = f"SRC_URI = \"{ud.url};digest=sha256:{checksum}\"" + + strict = d.getVar("BB_STRICT_CHECKSUM") or "0" + + # If strict checking enabled and neither sum defined, raise error + if strict == "1": + raise NoChecksumError(checksum_line) + + checksum_event = {"sha256sum": checksum} + bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d) + + if strict == "ignore": + return + + # Log missing digest so user can more easily add it + logger.warning( + f"Missing checksum for '{ud.localpath}', consider using this " \ + f"SRC_URI in the recipe:\n{checksum_line}") + + def unpack(self, ud, rootdir, d): + arch = d.getVar('PACKAGE_ARCH') + variant_opt = "" + if arch == "armhf": + arch = "arm" + variant_opt = "--override-variant v7" + elif arch == "armel": + arch = "arm" + variant_opt = "--override-variant v6" + runfetchcmd(f"skopeo --override-arch {arch} {variant_opt} " \ + f"copy dir:{ud.localpath} dir:{rootdir + '/' + ud.localname}", d) + + def clean(self, ud, d): + bb.utils.remove(ud.localpath, recurse=True)