[v4,1/5] Introduce fetcher from container registries

Message ID 82da88bf02bf928d8807bc93bfb5fcdeece1f558.1721407122.git.jan.kiszka@siemens.com
State Accepted, archived
Headers show
Series Introduce container fetcher and pre-loader | expand

Commit Message

Jan Kiszka July 19, 2024, 4:38 p.m. UTC
From: Jan Kiszka <jan.kiszka@siemens.com>

This bitbake fetcher allows to pull container images from registries,
store them in the download cache and transfer them into the workdir of
recipes requesting the image. The format of the URL is

docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>]

Fetching without digest is supported but will cause a warning, just like
downloading via wget without a checksum. If tag is left out, "latest" is
used.

In case a multi-arch image is specified, the fetcher will only pull for
the package architecture of the requesting recipe. The image is stored
compressed in docker-archive format and, wherever possible, hard-linked
from DL_DIR to WORKDIR. Future versions may also introduce full
unpacking of the fetched container layers in workdir if use cases come up.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
---
 meta/classes/dpkg-base.bbclass |  6 +++
 meta/lib/container_fetcher.py  | 86 ++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+)
 create mode 100644 meta/lib/container_fetcher.py

Comments

Benedikt Niedermayr July 25, 2024, 10:48 a.m. UTC | #1
On Fri, 2024-07-19 at 18:38 +0200, Jan Kiszka wrote:
> From: Jan Kiszka <jan.kiszka@siemens.com>
> 
> This bitbake fetcher allows to pull container images from registries,
> store them in the download cache and transfer them into the workdir of
> recipes requesting the image. The format of the URL is
> 
> docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>]
> 
> Fetching without digest is supported but will cause a warning, just like
> downloading via wget without a checksum. If tag is left out, "latest" is
> used.
> 
> In case a multi-arch image is specified, the fetcher will only pull for
> the package architecture of the requesting recipe. The image is stored
> compressed in docker-archive format and, wherever possible, hard-linked
> from DL_DIR to WORKDIR. Future versions may also introduce full
> unpacking of the fetched container layers in workdir if use cases come up.
> 
> Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> ---
>  meta/classes/dpkg-base.bbclass |  6 +++
>  meta/lib/container_fetcher.py  | 86 ++++++++++++++++++++++++++++++++++
>  2 files changed, 92 insertions(+)
>  create mode 100644 meta/lib/container_fetcher.py
> 
> diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg-base.bbclass
> index 789d6c74..d90b32a9 100644
> --- a/meta/classes/dpkg-base.bbclass
> +++ b/meta/classes/dpkg-base.bbclass
> @@ -98,6 +98,12 @@ python() {
>      if len(d.getVar('SRC_APT').strip()) > 0:
>          bb.build.addtask('apt_unpack', 'do_patch', '', d)
>          bb.build.addtask('cleanall_apt', 'do_cleanall', '', d)
> +
> +    # container docker fetcher
> +    import container_fetcher
> +    from bb.fetch2 import methods
> +
> +    methods.append(container_fetcher.Container())
>  }
>  
>  do_apt_fetch() {
> diff --git a/meta/lib/container_fetcher.py b/meta/lib/container_fetcher.py
> new file mode 100644
> index 00000000..0d659154
> --- /dev/null
> +++ b/meta/lib/container_fetcher.py
> @@ -0,0 +1,86 @@
> +# This software is a part of ISAR.
> +# Copyright (c) Siemens AG, 2024
> +#
> +# SPDX-License-Identifier: MIT
> +
> +import oe.path
> +import os
> +import tempfile
> +from   bb.fetch2 import FetchMethod
> +from   bb.fetch2 import logger
> +from   bb.fetch2 import MissingChecksumEvent
> +from   bb.fetch2 import NoChecksumError
> +from   bb.fetch2 import runfetchcmd
> +
> +class Container(FetchMethod):
> +    def supports(self, ud, d):
> +        return ud.type in ['docker']
> +
> +    def urldata_init(self, ud, d):
> +        ud.tag = "latest"
> +        if "tag" in ud.parm:
> +            ud.tag = ud.parm["tag"]
> +
> +        ud.digest = None
> +        if "digest" in ud.parm:
> +            ud.digest = ud.parm["digest"]
> +
> +        ud.arch = d.getVar('PACKAGE_ARCH')
> +        ud.variant = None
> +        if ud.arch == "armhf":
> +            ud.arch = "arm"
> +            ud.variant = "v7"
> +        elif ud.arch == "armel":
> +            ud.arch = "arm"
> +            ud.variant = "v6"
> +
> +        ud.container_name = ud.host + (ud.path if ud.path != "/" else "")
> +        ud.container_src = ud.container_name + \
> +            ("@" + ud.digest if ud.digest else ":" + ud.tag)
> +        ud.localname = ud.container_name.replace('/', '.')
> +        ud.localfile = "container-images/" + ud.arch + "/" + \
> +            (ud.variant + "/" if ud.variant else "") + ud.localname + \
> +            "_" + (ud.digest.replace(":", "-") if ud.digest else ud.tag) + \
> +            ".zst"
> +
> +    def download(self, ud, d):
> +        tarball = ud.localfile[:-len('.zst')]
> +        with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir:
> +            # Take a two steps for downloading into a docker archive because
> +            # not all source may have the required Docker schema 2 manifest.
> +            runfetchcmd("skopeo copy --preserve-digests " + \
> +                f"--override-arch {ud.arch} " + \
> +                (f"--override-variant {ud.variant} " if ud.variant else "") + \
> +                f"docker://{ud.container_src} dir:{tmpdir}", d)
> +            runfetchcmd(f"skopeo copy dir:{tmpdir} " + \
> +                f"docker-archive:{tarball}:{ud.container_name}:{ud.tag}", d)
> +        zstd_defaults = d.getVar('ZSTD_DEFAULTS')
> +        runfetchcmd(f"zstd -f --rm {zstd_defaults} {tarball}", d)

do_fetch doesn't get triggered if "ZSTD_LEVEL" changes, but it should, since the
output zst file changes with a different ZSTD_LEVEL.

Even adding a do_fetch[vardeps] += "ZSTD_DEFAULTS" doesn't lead to the desired behavior:

When ZSTD_LEVEL changes, the fetch task is run but does not actually fetch anything and
immediately proceeds with do_unpack. More precisely, the download() Method of the Container()
class is not getting executed, maybe due to some logic in the upper/surrounding Fetch() class.
That would be fine if the compression task didn't implement the compression...

Another thing to mention is that he whole fetch task is run again only if the compression changes.
Skopeo seems to override already downloaded layers rather than skipping them, which means that
changing the compression also means a complete new download process. 

Maybe the unpack() task for packing and hardlinking the images would be better, even though it
sounds strange to put an compression task into the unpack task.
 
Benedikt

> +
> +        if ud.digest:
> +            return
> +
> +        checksum = bb.utils.sha256_file(ud.localpath + "/manifest.json")
> +        checksum_line = f"SRC_URI = \"{ud.url};digest=sha256:{checksum}\""
> +
> +        strict = d.getVar("BB_STRICT_CHECKSUM") or "0"
> +
> +        # If strict checking enabled and neither sum defined, raise error
> +        if strict == "1":
> +            raise NoChecksumError(checksum_line)
> +
> +        checksum_event = {"sha256sum": checksum}
> +        bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d)
> +
> +        if strict == "ignore":
> +            return
> +
> +        # Log missing digest so user can more easily add it
> +        logger.warning(
> +            f"Missing checksum for '{ud.localpath}', consider using this " \
> +            f"SRC_URI in the recipe:\n{checksum_line}")
> +
> +    def unpack(self, ud, rootdir, d):
> +        image_file = ud.localname + ":" + ud.tag + ".zst"
> +        oe.path.remove(rootdir + "/" + image_file)
> +        oe.path.copyhardlink(ud.localpath, rootdir + "/" + image_file)
Benedikt Niedermayr July 25, 2024, 11:10 a.m. UTC | #2
On Thu, 2024-07-25 at 10:48 +0000, 'Niedermayr, BENEDIKT' via isar-users wrote:
> On Fri, 2024-07-19 at 18:38 +0200, Jan Kiszka wrote:
> > From: Jan Kiszka <jan.kiszka@siemens.com>
> > 
> > This bitbake fetcher allows to pull container images from registries,
> > store them in the download cache and transfer them into the workdir of
> > recipes requesting the image. The format of the URL is
> > 
> > docker://[<host>/]<image>;digest=sha256:...[;tag=<tag>]
> > 
> > Fetching without digest is supported but will cause a warning, just like
> > downloading via wget without a checksum. If tag is left out, "latest" is
> > used.
> > 
> > In case a multi-arch image is specified, the fetcher will only pull for
> > the package architecture of the requesting recipe. The image is stored
> > compressed in docker-archive format and, wherever possible, hard-linked
> > from DL_DIR to WORKDIR. Future versions may also introduce full
> > unpacking of the fetched container layers in workdir if use cases come up.
> > 
> > Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
> > ---
> >  meta/classes/dpkg-base.bbclass |  6 +++
> >  meta/lib/container_fetcher.py  | 86 ++++++++++++++++++++++++++++++++++
> >  2 files changed, 92 insertions(+)
> >  create mode 100644 meta/lib/container_fetcher.py
> > 
> > diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg-base.bbclass
> > index 789d6c74..d90b32a9 100644
> > --- a/meta/classes/dpkg-base.bbclass
> > +++ b/meta/classes/dpkg-base.bbclass
> > @@ -98,6 +98,12 @@ python() {
> >      if len(d.getVar('SRC_APT').strip()) > 0:
> >          bb.build.addtask('apt_unpack', 'do_patch', '', d)
> >          bb.build.addtask('cleanall_apt', 'do_cleanall', '', d)
> > +
> > +    # container docker fetcher
> > +    import container_fetcher
> > +    from bb.fetch2 import methods
> > +
> > +    methods.append(container_fetcher.Container())
> >  }
> >  
> >  do_apt_fetch() {
> > diff --git a/meta/lib/container_fetcher.py b/meta/lib/container_fetcher.py
> > new file mode 100644
> > index 00000000..0d659154
> > --- /dev/null
> > +++ b/meta/lib/container_fetcher.py
> > @@ -0,0 +1,86 @@
> > +# This software is a part of ISAR.
> > +# Copyright (c) Siemens AG, 2024
> > +#
> > +# SPDX-License-Identifier: MIT
> > +
> > +import oe.path
> > +import os
> > +import tempfile
> > +from   bb.fetch2 import FetchMethod
> > +from   bb.fetch2 import logger
> > +from   bb.fetch2 import MissingChecksumEvent
> > +from   bb.fetch2 import NoChecksumError
> > +from   bb.fetch2 import runfetchcmd
> > +
> > +class Container(FetchMethod):
> > +    def supports(self, ud, d):
> > +        return ud.type in ['docker']
> > +
> > +    def urldata_init(self, ud, d):
> > +        ud.tag = "latest"
> > +        if "tag" in ud.parm:
> > +            ud.tag = ud.parm["tag"]
> > +
> > +        ud.digest = None
> > +        if "digest" in ud.parm:
> > +            ud.digest = ud.parm["digest"]
> > +
> > +        ud.arch = d.getVar('PACKAGE_ARCH')
> > +        ud.variant = None
> > +        if ud.arch == "armhf":
> > +            ud.arch = "arm"
> > +            ud.variant = "v7"
> > +        elif ud.arch == "armel":
> > +            ud.arch = "arm"
> > +            ud.variant = "v6"
> > +
> > +        ud.container_name = ud.host + (ud.path if ud.path != "/" else "")
> > +        ud.container_src = ud.container_name + \
> > +            ("@" + ud.digest if ud.digest else ":" + ud.tag)
> > +        ud.localname = ud.container_name.replace('/', '.')
> > +        ud.localfile = "container-images/" + ud.arch + "/" + \
> > +            (ud.variant + "/" if ud.variant else "") + ud.localname + \
> > +            "_" + (ud.digest.replace(":", "-") if ud.digest else ud.tag) + \
> > +            ".zst"
> > +
> > +    def download(self, ud, d):
> > +        tarball = ud.localfile[:-len('.zst')]
> > +        with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir:
> > +            # Take a two steps for downloading into a docker archive because
> > +            # not all source may have the required Docker schema 2 manifest.
> > +            runfetchcmd("skopeo copy --preserve-digests " + \
> > +                f"--override-arch {ud.arch} " + \
> > +                (f"--override-variant {ud.variant} " if ud.variant else "") + \
> > +                f"docker://{ud.container_src} dir:{tmpdir}", d)
> > +            runfetchcmd(f"skopeo copy dir:{tmpdir} " + \
> > +                f"docker-archive:{tarball}:{ud.container_name}:{ud.tag}", d)
> > +        zstd_defaults = d.getVar('ZSTD_DEFAULTS')
> > +        runfetchcmd(f"zstd -f --rm {zstd_defaults} {tarball}", d)
> 
> do_fetch doesn't get triggered if "ZSTD_LEVEL" changes, but it should, since the
> output zst file changes with a different ZSTD_LEVEL.
> 
> Even adding a do_fetch[vardeps] += "ZSTD_DEFAULTS" doesn't lead to the desired behavior:
> 
> When ZSTD_LEVEL changes, the fetch task is run but does not actually fetch anything and
> immediately proceeds with do_unpack. More precisely, the download() Method of the Container()
> class is not getting executed, maybe due to some logic in the upper/surrounding Fetch() class.
> That would be fine if the compression task didn't implement the compression...
Sorry wrong wording:

That would be fine if the FETCH task didn't implement the compression...

Benedikt

> 
> Another thing to mention is that he whole fetch task is run again only if the compression changes.
> Skopeo seems to override already downloaded layers rather than skipping them, which means that
> changing the compression also means a complete new download process. 
> 
> Maybe the unpack() task for packing and hardlinking the images would be better, even though it
> sounds strange to put an compression task into the unpack task.
>  
> Benedikt
> 
> > +
> > +        if ud.digest:
> > +            return
> > +
> > +        checksum = bb.utils.sha256_file(ud.localpath + "/manifest.json")
> > +        checksum_line = f"SRC_URI = \"{ud.url};digest=sha256:{checksum}\""
> > +
> > +        strict = d.getVar("BB_STRICT_CHECKSUM") or "0"
> > +
> > +        # If strict checking enabled and neither sum defined, raise error
> > +        if strict == "1":
> > +            raise NoChecksumError(checksum_line)
> > +
> > +        checksum_event = {"sha256sum": checksum}
> > +        bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d)
> > +
> > +        if strict == "ignore":
> > +            return
> > +
> > +        # Log missing digest so user can more easily add it
> > +        logger.warning(
> > +            f"Missing checksum for '{ud.localpath}', consider using this " \
> > +            f"SRC_URI in the recipe:\n{checksum_line}")
> > +
> > +    def unpack(self, ud, rootdir, d):
> > +        image_file = ud.localname + ":" + ud.tag + ".zst"
> > +        oe.path.remove(rootdir + "/" + image_file)
> > +        oe.path.copyhardlink(ud.localpath, rootdir + "/" + image_file)
>

Patch

diff --git a/meta/classes/dpkg-base.bbclass b/meta/classes/dpkg-base.bbclass
index 789d6c74..d90b32a9 100644
--- a/meta/classes/dpkg-base.bbclass
+++ b/meta/classes/dpkg-base.bbclass
@@ -98,6 +98,12 @@  python() {
     if len(d.getVar('SRC_APT').strip()) > 0:
         bb.build.addtask('apt_unpack', 'do_patch', '', d)
         bb.build.addtask('cleanall_apt', 'do_cleanall', '', d)
+
+    # container docker fetcher
+    import container_fetcher
+    from bb.fetch2 import methods
+
+    methods.append(container_fetcher.Container())
 }
 
 do_apt_fetch() {
diff --git a/meta/lib/container_fetcher.py b/meta/lib/container_fetcher.py
new file mode 100644
index 00000000..0d659154
--- /dev/null
+++ b/meta/lib/container_fetcher.py
@@ -0,0 +1,86 @@ 
+# This software is a part of ISAR.
+# Copyright (c) Siemens AG, 2024
+#
+# SPDX-License-Identifier: MIT
+
+import oe.path
+import os
+import tempfile
+from   bb.fetch2 import FetchMethod
+from   bb.fetch2 import logger
+from   bb.fetch2 import MissingChecksumEvent
+from   bb.fetch2 import NoChecksumError
+from   bb.fetch2 import runfetchcmd
+
+class Container(FetchMethod):
+    def supports(self, ud, d):
+        return ud.type in ['docker']
+
+    def urldata_init(self, ud, d):
+        ud.tag = "latest"
+        if "tag" in ud.parm:
+            ud.tag = ud.parm["tag"]
+
+        ud.digest = None
+        if "digest" in ud.parm:
+            ud.digest = ud.parm["digest"]
+
+        ud.arch = d.getVar('PACKAGE_ARCH')
+        ud.variant = None
+        if ud.arch == "armhf":
+            ud.arch = "arm"
+            ud.variant = "v7"
+        elif ud.arch == "armel":
+            ud.arch = "arm"
+            ud.variant = "v6"
+
+        ud.container_name = ud.host + (ud.path if ud.path != "/" else "")
+        ud.container_src = ud.container_name + \
+            ("@" + ud.digest if ud.digest else ":" + ud.tag)
+        ud.localname = ud.container_name.replace('/', '.')
+        ud.localfile = "container-images/" + ud.arch + "/" + \
+            (ud.variant + "/" if ud.variant else "") + ud.localname + \
+            "_" + (ud.digest.replace(":", "-") if ud.digest else ud.tag) + \
+            ".zst"
+
+    def download(self, ud, d):
+        tarball = ud.localfile[:-len('.zst')]
+        with tempfile.TemporaryDirectory(dir=d.getVar('DL_DIR')) as tmpdir:
+            # Take a two steps for downloading into a docker archive because
+            # not all source may have the required Docker schema 2 manifest.
+            runfetchcmd("skopeo copy --preserve-digests " + \
+                f"--override-arch {ud.arch} " + \
+                (f"--override-variant {ud.variant} " if ud.variant else "") + \
+                f"docker://{ud.container_src} dir:{tmpdir}", d)
+            runfetchcmd(f"skopeo copy dir:{tmpdir} " + \
+                f"docker-archive:{tarball}:{ud.container_name}:{ud.tag}", d)
+        zstd_defaults = d.getVar('ZSTD_DEFAULTS')
+        runfetchcmd(f"zstd -f --rm {zstd_defaults} {tarball}", d)
+
+        if ud.digest:
+            return
+
+        checksum = bb.utils.sha256_file(ud.localpath + "/manifest.json")
+        checksum_line = f"SRC_URI = \"{ud.url};digest=sha256:{checksum}\""
+
+        strict = d.getVar("BB_STRICT_CHECKSUM") or "0"
+
+        # If strict checking enabled and neither sum defined, raise error
+        if strict == "1":
+            raise NoChecksumError(checksum_line)
+
+        checksum_event = {"sha256sum": checksum}
+        bb.event.fire(MissingChecksumEvent(ud.url, **checksum_event), d)
+
+        if strict == "ignore":
+            return
+
+        # Log missing digest so user can more easily add it
+        logger.warning(
+            f"Missing checksum for '{ud.localpath}', consider using this " \
+            f"SRC_URI in the recipe:\n{checksum_line}")
+
+    def unpack(self, ud, rootdir, d):
+        image_file = ud.localname + ":" + ud.tag + ".zst"
+        oe.path.remove(rootdir + "/" + image_file)
+        oe.path.copyhardlink(ud.localpath, rootdir + "/" + image_file)