fix(isar-sstate): also handle zst files

Message ID 20230210153434.1024604-1-adriaan.schmidt@siemens.com
State Accepted, archived
Headers show
Series fix(isar-sstate): also handle zst files | expand

Commit Message

Schmidt, Adriaan Feb. 10, 2023, 3:34 p.m. UTC
With bitbake 2.0, sstate artifacts have changed from tgz to tar.zst.
Our isar-sstate script needs to scan for those as well. The implementation
is backwards-compatible.

Signed-off-by: Adriaan Schmidt <adriaan.schmidt@siemens.com>
---
 scripts/isar-sstate | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

Comments

Henning Schild Feb. 10, 2023, 3:48 p.m. UTC | #1
Am Fri, 10 Feb 2023 16:34:34 +0100
schrieb Adriaan Schmidt <adriaan.schmidt@siemens.com>:

> With bitbake 2.0, sstate artifacts have changed from tgz to tar.zst.
> Our isar-sstate script needs to scan for those as well. The
> implementation is backwards-compatible.

I came here just wanting to make sure it will work for both, and yes it
does!

Good catch!

Henning

> Signed-off-by: Adriaan Schmidt <adriaan.schmidt@siemens.com>
> ---
>  scripts/isar-sstate | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/scripts/isar-sstate b/scripts/isar-sstate
> index 53d0541f..c14c2843 100755
> --- a/scripts/isar-sstate
> +++ b/scripts/isar-sstate
> @@ -40,7 +40,7 @@ followed by one of `w`, `d`, `h`, `m`, or `s` (for
> weeks, days, hours, minutes, seconds, respectively).
>  
>  `--max-age` specifies up to which age artifacts should be kept in
> the cache. -Anything older will be removed. Note that this only
> applies to the `.tgz` files +Anything older will be removed. Note
> that this only applies to the archive files containing the actual
> cached items, not the `.siginfo` files containing the cache metadata
> (signatures and hashes). To permit analysis of caching details using
> the `analyze` command, the siginfo @@ -576,7 +576,7 @@ def
> arguments(): '-v', '--verbose', default=False, action='store_true')
>      parser.add_argument(
>          '--max-age', type=str, default='1d',
> -        help="clean: remove tgz files older than MAX_AGE (a number
> followed by w|d|h|m|s)")
> +        help="clean: remove archive files older than MAX_AGE (a
> number followed by w|d|h|m|s)") parser.add_argument(
>          '--max-sig-age', type=str, default=None,
>          help="clean: remove siginfo files older than MAX_SIG_AGE
> (defaults to MAX_AGE)") @@ -664,21 +664,21 @@ def
> sstate_clean(target, max_age, max_sig_age, verbose, **kwargs): links
> = [f for f in all_files if f.islink] if links:
>          print(f"NOTE: we have links: {links}")
> -    tgz_files = [f for f in all_files if f.suffix == 'tgz']
> -    siginfo_files = [f for f in all_files if f.suffix ==
> 'tgz.siginfo']
> -    del_tgz_files = [f for f in tgz_files if f.age >=
> max_age_seconds]
> -    del_tgz_hashes = [f.hash for f in del_tgz_files]
> +    archive_files = [f for f in all_files if f.suffix in ['tgz',
> 'tar.zst']]
> +    siginfo_files = [f for f in all_files if f.suffix in
> ['tgz.siginfo', 'tar.zst.siginfo']]
> +    del_archive_files = [f for f in archive_files if f.age >=
> max_age_seconds]
> +    del_archive_hashes = [f.hash for f in del_archive_files]
>      del_siginfo_files = [f for f in siginfo_files if
> -                         f.age >= max_sig_age_seconds or f.hash in
> del_tgz_hashes]
> -    print(f"INFO: found {len(tgz_files)} tgz files,
> {len(del_tgz_files)} of which are older than {max_age}")
> +                         f.age >= max_sig_age_seconds or f.hash in
> del_archive_hashes]
> +    print(f"INFO: found {len(archive_files)} archive files,
> {len(del_archive_files)} of which are older than {max_age}")
> print(f"INFO: found {len(siginfo_files)} siginfo files,
> {len(del_siginfo_files)} of which "
> -          f"correspond to old tgz files or are older than
> {max_sig_age}")
> +          f"correspond to old archive files or are older than
> {max_sig_age}") 
> -    for f in del_tgz_files + del_siginfo_files:
> +    for f in del_archive_files + del_siginfo_files:
>          if verbose:
>              print(f"[DELETE] {f.path}")
>          target.delete(f.path)
> -    freed_gb = sum([x.size for x in del_tgz_files +
> del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0
> +    freed_gb = sum([x.size for x in del_archive_files +
> del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0 print(f"INFO: freed
> {freed_gb:.02f} GB") return 0
>  
> @@ -716,7 +716,7 @@ def sstate_info(target, verbose, **kwargs):
>      for k, entries in recipes.items():
>          print(f"Cache entries for {k}:")
>          for pn in entries:
> -            artifacts = [f for f in all_files if f.pn == pn and
> f.task == key_task[k] and f.suffix == 'tgz']
> +            artifacts = [f for f in all_files if f.pn == pn and
> f.task == key_task[k] and f.suffix in ['tgz', 'tar.zst']] print(f"  -
> {pn}: {len(artifacts)} entries") print("Other cache entries:")
>      for pn in others:
Henning Schild Feb. 10, 2023, 7:16 p.m. UTC | #2
ACK!

fixes a bug in "next" that came with "bb2" and should be considered on a
"fast path"

sstate testing in Isar might have to be improved to cover this script

after we test that sstate rebuild was faster, we could test that sstate
clean age=1s gives us an "empty cache" ... "du -s" == small

Henning

Am Fri, 10 Feb 2023 16:34:34 +0100
schrieb Adriaan Schmidt <adriaan.schmidt@siemens.com>:

> With bitbake 2.0, sstate artifacts have changed from tgz to tar.zst.
> Our isar-sstate script needs to scan for those as well. The
> implementation is backwards-compatible.
> 
> Signed-off-by: Adriaan Schmidt <adriaan.schmidt@siemens.com>
> ---
>  scripts/isar-sstate | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/scripts/isar-sstate b/scripts/isar-sstate
> index 53d0541f..c14c2843 100755
> --- a/scripts/isar-sstate
> +++ b/scripts/isar-sstate
> @@ -40,7 +40,7 @@ followed by one of `w`, `d`, `h`, `m`, or `s` (for
> weeks, days, hours, minutes, seconds, respectively).
>  
>  `--max-age` specifies up to which age artifacts should be kept in
> the cache. -Anything older will be removed. Note that this only
> applies to the `.tgz` files +Anything older will be removed. Note
> that this only applies to the archive files containing the actual
> cached items, not the `.siginfo` files containing the cache metadata
> (signatures and hashes). To permit analysis of caching details using
> the `analyze` command, the siginfo @@ -576,7 +576,7 @@ def
> arguments(): '-v', '--verbose', default=False, action='store_true')
>      parser.add_argument(
>          '--max-age', type=str, default='1d',
> -        help="clean: remove tgz files older than MAX_AGE (a number
> followed by w|d|h|m|s)")
> +        help="clean: remove archive files older than MAX_AGE (a
> number followed by w|d|h|m|s)") parser.add_argument(
>          '--max-sig-age', type=str, default=None,
>          help="clean: remove siginfo files older than MAX_SIG_AGE
> (defaults to MAX_AGE)") @@ -664,21 +664,21 @@ def
> sstate_clean(target, max_age, max_sig_age, verbose, **kwargs): links
> = [f for f in all_files if f.islink] if links:
>          print(f"NOTE: we have links: {links}")
> -    tgz_files = [f for f in all_files if f.suffix == 'tgz']
> -    siginfo_files = [f for f in all_files if f.suffix ==
> 'tgz.siginfo']
> -    del_tgz_files = [f for f in tgz_files if f.age >=
> max_age_seconds]
> -    del_tgz_hashes = [f.hash for f in del_tgz_files]
> +    archive_files = [f for f in all_files if f.suffix in ['tgz',
> 'tar.zst']]
> +    siginfo_files = [f for f in all_files if f.suffix in
> ['tgz.siginfo', 'tar.zst.siginfo']]
> +    del_archive_files = [f for f in archive_files if f.age >=
> max_age_seconds]
> +    del_archive_hashes = [f.hash for f in del_archive_files]
>      del_siginfo_files = [f for f in siginfo_files if
> -                         f.age >= max_sig_age_seconds or f.hash in
> del_tgz_hashes]
> -    print(f"INFO: found {len(tgz_files)} tgz files,
> {len(del_tgz_files)} of which are older than {max_age}")
> +                         f.age >= max_sig_age_seconds or f.hash in
> del_archive_hashes]
> +    print(f"INFO: found {len(archive_files)} archive files,
> {len(del_archive_files)} of which are older than {max_age}")
> print(f"INFO: found {len(siginfo_files)} siginfo files,
> {len(del_siginfo_files)} of which "
> -          f"correspond to old tgz files or are older than
> {max_sig_age}")
> +          f"correspond to old archive files or are older than
> {max_sig_age}") 
> -    for f in del_tgz_files + del_siginfo_files:
> +    for f in del_archive_files + del_siginfo_files:
>          if verbose:
>              print(f"[DELETE] {f.path}")
>          target.delete(f.path)
> -    freed_gb = sum([x.size for x in del_tgz_files +
> del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0
> +    freed_gb = sum([x.size for x in del_archive_files +
> del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0 print(f"INFO: freed
> {freed_gb:.02f} GB") return 0
>  
> @@ -716,7 +716,7 @@ def sstate_info(target, verbose, **kwargs):
>      for k, entries in recipes.items():
>          print(f"Cache entries for {k}:")
>          for pn in entries:
> -            artifacts = [f for f in all_files if f.pn == pn and
> f.task == key_task[k] and f.suffix == 'tgz']
> +            artifacts = [f for f in all_files if f.pn == pn and
> f.task == key_task[k] and f.suffix in ['tgz', 'tar.zst']] print(f"  -
> {pn}: {len(artifacts)} entries") print("Other cache entries:")
>      for pn in others:
MOESSBAUER, Felix Feb. 11, 2023, 12:15 a.m. UTC | #3
ACK!

It would be great if we could merge this in a timely manner.
Currently, a lot of integration work for bitbak 2.0 is going on in
downstream layers. Having no way to clean the cache is a major pain-
point (esp. in CIs, where the existing logic uses isar-sstate for
cleanup).

Felix

On Fri, 2023-02-10 at 16:34 +0100, Adriaan Schmidt wrote:
> With bitbake 2.0, sstate artifacts have changed from tgz to tar.zst.
> Our isar-sstate script needs to scan for those as well. The
> implementation
> is backwards-compatible.
> 
> Signed-off-by: Adriaan Schmidt <adriaan.schmidt@siemens.com>
> ---
>  scripts/isar-sstate | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/scripts/isar-sstate b/scripts/isar-sstate
> index 53d0541f..c14c2843 100755
> --- a/scripts/isar-sstate
> +++ b/scripts/isar-sstate
> @@ -40,7 +40,7 @@ followed by one of `w`, `d`, `h`, `m`, or `s` (for
> weeks, days, hours, minutes,
>  seconds, respectively).
>  
>  `--max-age` specifies up to which age artifacts should be kept in
> the cache.
> -Anything older will be removed. Note that this only applies to the
> `.tgz` files
> +Anything older will be removed. Note that this only applies to the
> archive files
>  containing the actual cached items, not the `.siginfo` files
> containing the
>  cache metadata (signatures and hashes).
>  To permit analysis of caching details using the `analyze` command,
> the siginfo
> @@ -576,7 +576,7 @@ def arguments():
>          '-v', '--verbose', default=False, action='store_true')
>      parser.add_argument(
>          '--max-age', type=str, default='1d',
> -        help="clean: remove tgz files older than MAX_AGE (a number
> followed by w|d|h|m|s)")
> +        help="clean: remove archive files older than MAX_AGE (a
> number followed by w|d|h|m|s)")
>      parser.add_argument(
>          '--max-sig-age', type=str, default=None,
>          help="clean: remove siginfo files older than MAX_SIG_AGE
> (defaults to MAX_AGE)")
> @@ -664,21 +664,21 @@ def sstate_clean(target, max_age, max_sig_age,
> verbose, **kwargs):
>      links = [f for f in all_files if f.islink]
>      if links:
>          print(f"NOTE: we have links: {links}")
> -    tgz_files = [f for f in all_files if f.suffix == 'tgz']
> -    siginfo_files = [f for f in all_files if f.suffix ==
> 'tgz.siginfo']
> -    del_tgz_files = [f for f in tgz_files if f.age >=
> max_age_seconds]
> -    del_tgz_hashes = [f.hash for f in del_tgz_files]
> +    archive_files = [f for f in all_files if f.suffix in ['tgz',
> 'tar.zst']]
> +    siginfo_files = [f for f in all_files if f.suffix in
> ['tgz.siginfo', 'tar.zst.siginfo']]
> +    del_archive_files = [f for f in archive_files if f.age >=
> max_age_seconds]
> +    del_archive_hashes = [f.hash for f in del_archive_files]
>      del_siginfo_files = [f for f in siginfo_files if
> -                         f.age >= max_sig_age_seconds or f.hash in
> del_tgz_hashes]
> -    print(f"INFO: found {len(tgz_files)} tgz files,
> {len(del_tgz_files)} of which are older than {max_age}")
> +                         f.age >= max_sig_age_seconds or f.hash in
> del_archive_hashes]
> +    print(f"INFO: found {len(archive_files)} archive files,
> {len(del_archive_files)} of which are older than {max_age}")
>      print(f"INFO: found {len(siginfo_files)} siginfo files,
> {len(del_siginfo_files)} of which "
> -          f"correspond to old tgz files or are older than
> {max_sig_age}")
> +          f"correspond to old archive files or are older than
> {max_sig_age}")
>  
> -    for f in del_tgz_files + del_siginfo_files:
> +    for f in del_archive_files + del_siginfo_files:
>          if verbose:
>              print(f"[DELETE] {f.path}")
>          target.delete(f.path)
> -    freed_gb = sum([x.size for x in del_tgz_files +
> del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0
> +    freed_gb = sum([x.size for x in del_archive_files +
> del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0
>      print(f"INFO: freed {freed_gb:.02f} GB")
>      return 0
>  
> @@ -716,7 +716,7 @@ def sstate_info(target, verbose, **kwargs):
>      for k, entries in recipes.items():
>          print(f"Cache entries for {k}:")
>          for pn in entries:
> -            artifacts = [f for f in all_files if f.pn == pn and
> f.task == key_task[k] and f.suffix == 'tgz']
> +            artifacts = [f for f in all_files if f.pn == pn and
> f.task == key_task[k] and f.suffix in ['tgz', 'tar.zst']]
>              print(f"  - {pn}: {len(artifacts)} entries")
>      print("Other cache entries:")
>      for pn in others:
> -- 
> 2.30.2
>
Uladzimir Bely Feb. 16, 2023, 4:32 a.m. UTC | #4
In the email from Friday, 10 February 2023 18:34:34 +03 user Adriaan Schmidt 
wrote:
> With bitbake 2.0, sstate artifacts have changed from tgz to tar.zst.
> Our isar-sstate script needs to scan for those as well. The implementation
> is backwards-compatible.
> 
> Signed-off-by: Adriaan Schmidt <adriaan.schmidt@siemens.com>
> ---
>  scripts/isar-sstate | 24 ++++++++++++------------
>  1 file changed, 12 insertions(+), 12 deletions(-)
> 
> diff --git a/scripts/isar-sstate b/scripts/isar-sstate
> index 53d0541f..c14c2843 100755
> --- a/scripts/isar-sstate
> +++ b/scripts/isar-sstate
> @@ -40,7 +40,7 @@ followed by one of `w`, `d`, `h`, `m`, or `s` (for weeks,
> days, hours, minutes, seconds, respectively).
> 
>  `--max-age` specifies up to which age artifacts should be kept in the
> cache. -Anything older will be removed. Note that this only applies to the
> `.tgz` files +Anything older will be removed. Note that this only applies
> to the archive files containing the actual cached items, not the `.siginfo`
> files containing the cache metadata (signatures and hashes).
>  To permit analysis of caching details using the `analyze` command, the
> siginfo @@ -576,7 +576,7 @@ def arguments():
>          '-v', '--verbose', default=False, action='store_true')
>      parser.add_argument(
>          '--max-age', type=str, default='1d',
> -        help="clean: remove tgz files older than MAX_AGE (a number followed
> by w|d|h|m|s)") +        help="clean: remove archive files older than
> MAX_AGE (a number followed by w|d|h|m|s)") parser.add_argument(
>          '--max-sig-age', type=str, default=None,
>          help="clean: remove siginfo files older than MAX_SIG_AGE (defaults
> to MAX_AGE)") @@ -664,21 +664,21 @@ def sstate_clean(target, max_age,
> max_sig_age, verbose, **kwargs): links = [f for f in all_files if f.islink]
>      if links:
>          print(f"NOTE: we have links: {links}")
> -    tgz_files = [f for f in all_files if f.suffix == 'tgz']
> -    siginfo_files = [f for f in all_files if f.suffix == 'tgz.siginfo']
> -    del_tgz_files = [f for f in tgz_files if f.age >= max_age_seconds]
> -    del_tgz_hashes = [f.hash for f in del_tgz_files]
> +    archive_files = [f for f in all_files if f.suffix in ['tgz',
> 'tar.zst']] +    siginfo_files = [f for f in all_files if f.suffix in
> ['tgz.siginfo', 'tar.zst.siginfo']] +    del_archive_files = [f for f in
> archive_files if f.age >= max_age_seconds] +    del_archive_hashes =
> [f.hash for f in del_archive_files]
>      del_siginfo_files = [f for f in siginfo_files if
> -                         f.age >= max_sig_age_seconds or f.hash in
> del_tgz_hashes] -    print(f"INFO: found {len(tgz_files)} tgz files,
> {len(del_tgz_files)} of which are older than {max_age}") +                 
>        f.age >= max_sig_age_seconds or f.hash in del_archive_hashes] +   
> print(f"INFO: found {len(archive_files)} archive files,
> {len(del_archive_files)} of which are older than {max_age}") print(f"INFO:
> found {len(siginfo_files)} siginfo files, {len(del_siginfo_files)} of which
> " -          f"correspond to old tgz files or are older than
> {max_sig_age}") +          f"correspond to old archive files or are older
> than {max_sig_age}")
> 
> -    for f in del_tgz_files + del_siginfo_files:
> +    for f in del_archive_files + del_siginfo_files:
>          if verbose:
>              print(f"[DELETE] {f.path}")
>          target.delete(f.path)
> -    freed_gb = sum([x.size for x in del_tgz_files + del_siginfo_files]) /
> 1024.0 / 1024.0 / 1024.0 +    freed_gb = sum([x.size for x in
> del_archive_files + del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0
> print(f"INFO: freed {freed_gb:.02f} GB")
>      return 0
> 
> @@ -716,7 +716,7 @@ def sstate_info(target, verbose, **kwargs):
>      for k, entries in recipes.items():
>          print(f"Cache entries for {k}:")
>          for pn in entries:
> -            artifacts = [f for f in all_files if f.pn == pn and f.task ==
> key_task[k] and f.suffix == 'tgz'] +            artifacts = [f for f in
> all_files if f.pn == pn and f.task == key_task[k] and f.suffix in ['tgz',
> 'tar.zst']] print(f"  - {pn}: {len(artifacts)} entries")
>      print("Other cache entries:")
>      for pn in others:

Applied to next, thanks.

Patch

diff --git a/scripts/isar-sstate b/scripts/isar-sstate
index 53d0541f..c14c2843 100755
--- a/scripts/isar-sstate
+++ b/scripts/isar-sstate
@@ -40,7 +40,7 @@  followed by one of `w`, `d`, `h`, `m`, or `s` (for weeks, days, hours, minutes,
 seconds, respectively).
 
 `--max-age` specifies up to which age artifacts should be kept in the cache.
-Anything older will be removed. Note that this only applies to the `.tgz` files
+Anything older will be removed. Note that this only applies to the archive files
 containing the actual cached items, not the `.siginfo` files containing the
 cache metadata (signatures and hashes).
 To permit analysis of caching details using the `analyze` command, the siginfo
@@ -576,7 +576,7 @@  def arguments():
         '-v', '--verbose', default=False, action='store_true')
     parser.add_argument(
         '--max-age', type=str, default='1d',
-        help="clean: remove tgz files older than MAX_AGE (a number followed by w|d|h|m|s)")
+        help="clean: remove archive files older than MAX_AGE (a number followed by w|d|h|m|s)")
     parser.add_argument(
         '--max-sig-age', type=str, default=None,
         help="clean: remove siginfo files older than MAX_SIG_AGE (defaults to MAX_AGE)")
@@ -664,21 +664,21 @@  def sstate_clean(target, max_age, max_sig_age, verbose, **kwargs):
     links = [f for f in all_files if f.islink]
     if links:
         print(f"NOTE: we have links: {links}")
-    tgz_files = [f for f in all_files if f.suffix == 'tgz']
-    siginfo_files = [f for f in all_files if f.suffix == 'tgz.siginfo']
-    del_tgz_files = [f for f in tgz_files if f.age >= max_age_seconds]
-    del_tgz_hashes = [f.hash for f in del_tgz_files]
+    archive_files = [f for f in all_files if f.suffix in ['tgz', 'tar.zst']]
+    siginfo_files = [f for f in all_files if f.suffix in ['tgz.siginfo', 'tar.zst.siginfo']]
+    del_archive_files = [f for f in archive_files if f.age >= max_age_seconds]
+    del_archive_hashes = [f.hash for f in del_archive_files]
     del_siginfo_files = [f for f in siginfo_files if
-                         f.age >= max_sig_age_seconds or f.hash in del_tgz_hashes]
-    print(f"INFO: found {len(tgz_files)} tgz files, {len(del_tgz_files)} of which are older than {max_age}")
+                         f.age >= max_sig_age_seconds or f.hash in del_archive_hashes]
+    print(f"INFO: found {len(archive_files)} archive files, {len(del_archive_files)} of which are older than {max_age}")
     print(f"INFO: found {len(siginfo_files)} siginfo files, {len(del_siginfo_files)} of which "
-          f"correspond to old tgz files or are older than {max_sig_age}")
+          f"correspond to old archive files or are older than {max_sig_age}")
 
-    for f in del_tgz_files + del_siginfo_files:
+    for f in del_archive_files + del_siginfo_files:
         if verbose:
             print(f"[DELETE] {f.path}")
         target.delete(f.path)
-    freed_gb = sum([x.size for x in del_tgz_files + del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0
+    freed_gb = sum([x.size for x in del_archive_files + del_siginfo_files]) / 1024.0 / 1024.0 / 1024.0
     print(f"INFO: freed {freed_gb:.02f} GB")
     return 0
 
@@ -716,7 +716,7 @@  def sstate_info(target, verbose, **kwargs):
     for k, entries in recipes.items():
         print(f"Cache entries for {k}:")
         for pn in entries:
-            artifacts = [f for f in all_files if f.pn == pn and f.task == key_task[k] and f.suffix == 'tgz']
+            artifacts = [f for f in all_files if f.pn == pn and f.task == key_task[k] and f.suffix in ['tgz', 'tar.zst']]
             print(f"  - {pn}: {len(artifacts)} entries")
     print("Other cache entries:")
     for pn in others: