[4/9] isar-sstate: lint: add support for checking stamps

Message ID 20240402172813.418770-5-chris.larson@siemens.com
State Superseded, archived
Headers show
Series Add more signature cachability tests to the testsuite | expand

Commit Message

kergoth@gmail.com April 2, 2024, 5:28 p.m. UTC
From: Christopher Larson <chris.larson@seimens.com>

Bitbake supports writing signature data directly to the stamps directory
without having to build, so we should add the ability to lint this
signature data as well. This is useful for checking for cachability
issues without having to complete a build.

Signed-off-by: Christopher Larson <chris.larson@siemens.com>
---
 scripts/isar-sstate | 34 +++++++++++++++++++++++-----------
 1 file changed, 23 insertions(+), 11 deletions(-)

Comments

MOESSBAUER, Felix April 3, 2024, 7:02 a.m. UTC | #1
On Tue, 2024-04-02 at 17:28 +0000, kergoth@gmail.com wrote:
> From: Christopher Larson <chris.larson@seimens.com>
> 
> Bitbake supports writing signature data directly to the stamps
> directory
> without having to build, so we should add the ability to lint this
> signature data as well. This is useful for checking for cachability
> issues without having to complete a build.

Hi,

I guess that is enabled with the '--dump-signatures=' parameter. Can we
please add a brief example (one-line) to the linter script for this
case.

> 
> Signed-off-by: Christopher Larson <chris.larson@siemens.com>
> ---
>  scripts/isar-sstate | 34 +++++++++++++++++++++++-----------
>  1 file changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/scripts/isar-sstate b/scripts/isar-sstate
> index 9b20cb8e..b77f73eb 100755
> --- a/scripts/isar-sstate
> +++ b/scripts/isar-sstate
> @@ -154,10 +154,12 @@ SstateCacheEntry = namedtuple(
>  #                   
> "${PV}:${PR}:${SSTATE_PKGARCH}:${SSTATE_VERSION}:"
>  
>  # This regex extracts relevant fields:
> -SstateRegex = re.compile(r'sstate:(?P<pn>[^:]*):[^:]*:[^:]*:[^:]*:'
> +SstateRegex =
> re.compile(r'(.*/)?sstate:(?P<pn>[^:]*):[^:]*:[^:]*:[^:]*:'
>                           r'(?P<arch>[^:]*):[^:]*:(?P<hash>[0-9a-
> f]*)_'
>                           r'(?P<task>[^\.]*)\.(?P<suffix>.*)')
> -
> +StampsRegex = re.compile(
> + 

Is this regex hand-crafted or can it be found somewhere in bitbake as
well? Is the format documented in bitbake? I'm a bit worried that this
might change across versions. If so, please add a note that this might
need to be adjusted on bitbake updates.

Felix

>   
> r"(.*/)?(?P<arch>[^/]+)/(?P<pn>[^/]+)/([^/]+)\.do_(?P<task>[^/]+)\.(?
> P<suffix>sigdata)\.(?P<hash>[0-9a-f]{64})"
> +)
>  
>  class SstateTargetBase(object):
>      def __init__(self, path, cached=False):
> @@ -288,12 +290,13 @@ class SstateTargetBase(object):
>  
>  
>  class SstateFileTarget(SstateTargetBase):
> -    def __init__(self, path, **kwargs):
> +    def __init__(self, path, regex=SstateRegex, **kwargs):
>          super().__init__(path, **kwargs)
>          if path.startswith('file://'):
>              path = path[len('file://'):]
>          self.path = path
>          self.basepath = os.path.abspath(path)
> +        self.regex = regex
>  
>      def __repr__(self):
>          return f"file://{self.path}"
> @@ -334,12 +337,13 @@ class SstateFileTarget(SstateTargetBase):
>          for subdir, dirs, files in os.walk(self.basepath):
>              reldir = subdir[(len(self.basepath)+1):]
>              for f in files:
> -                m = SstateRegex.match(f)
> +                relative = os.path.join(reldir, f)
> +                m = self.regex.match(relative)
>                  if m is not None:
>                      islink = os.path.islink(os.path.join(subdir, f))
>                      age = int(now -
> os.path.getmtime(os.path.join(subdir, f)))
>                      all_files.append(SstateCacheEntry(
> -                        path=os.path.join(reldir, f),
> +                        path=relative,
>                          size=os.path.getsize(os.path.join(subdir,
> f)),
>                          islink=islink,
>                          age=age,
> @@ -592,6 +596,9 @@ def arguments():
>      parser.add_argument(
>          '--exit-code', type=int, default=None,
>          help="lint: return this instead of number of found issues")
> +    parser.add_argument(
> +        '--lint-stamps', default=False, action='store_true',
> +        help="lint: assume target is a stamps directory (target must
> be a local path)")
>  
>      args = parser.parse_args()
>      if args.command in 'upload analyze'.split() and args.source is
> None:
> @@ -798,7 +805,7 @@ def sstate_analyze(source, target, **kwargs):
>              print('\n'.join(out))
>  
>  
> -def sstate_lint(target, verbose, sources_dir, build_dir, exit_code,
> pedantic, **kwargs):
> +def sstate_lint(target, verbose, sources_dir, build_dir, exit_code,
> pedantic, lint_stamps, **kwargs):
>      ADDITIONAL_IGNORED_VARNAMES = 'PP'.split()
>      # only list non-cacheable tasks here
>      # note that these still can break caching of other tasks that
> depend on these.
> @@ -809,7 +816,10 @@ def sstate_lint(target, verbose, sources_dir,
> build_dir, exit_code, pedantic, **
>          print(f"WARNING: target {target} does not exist. Nothing to
> analyze.")
>          return 0
>  
> -    cache_sigs = {s.hash: s for s in target.list_all() if
> s.suffix.endswith('.siginfo')}
> +    if lint_stamps:
> +        cache_sigs = {s.hash: s for s in target.list_all()}
> +    else:
> +        cache_sigs = {s.hash: s for s in target.list_all() if
> s.suffix.endswith('.siginfo')}
>  
>      hits_srcdir = 0
>      hits_builddir = 0
> @@ -891,10 +901,12 @@ def main():
>          target = SstateDavTarget(args.target)
>      elif args.target.startswith('s3://'):
>          target = SstateS3Target(args.target)
> -    elif args.target.startswith('file://'):
> -        target = SstateFileTarget(args.target)
> -    else:  # no protocol given, assume file://
> -        target = SstateFileTarget(args.target)
> +    else:  # Either file://, or no protocol given, assume file://
> +        target = SstateFileTarget(args.target, StampsRegex if
> args.lint_stamps else SstateRegex)
> +
> +    if args.lint_stamps and not isinstance(target,
> SstateFileTarget):
> +        print("ERROR: --lint-stamps only works with local file
> targets")
> +        return 1
>  
>      args.target = target
>      return globals()[f'sstate_{args.command}'](**vars(args))
> -- 
> 2.39.2
>
Larson, Chris April 3, 2024, 9:42 p.m. UTC | #2
Will do, thanks. Your feedback has been quite helpful.

-----Original Message-----
From: Moessbauer, Felix (T CED OES-DE) <felix.moessbauer@siemens.com> 
Sent: Wednesday, April 3, 2024 12:03 AM
To: kergoth@gmail.com; isar-users@googlegroups.com
Cc: Larson, Chris (DI CTO FDS CES LX MEL) <chris.larson@siemens.com>
Subject: Re: [PATCH 4/9] isar-sstate: lint: add support for checking stamps

On Tue, 2024-04-02 at 17:28 +0000, kergoth@gmail.com wrote:
> From: Christopher Larson <chris.larson@seimens.com>
> 
> Bitbake supports writing signature data directly to the stamps 
> directory without having to build, so we should add the ability to 
> lint this signature data as well. This is useful for checking for 
> cachability issues without having to complete a build.

Hi,

I guess that is enabled with the '--dump-signatures=' parameter. Can we please add a brief example (one-line) to the linter script for this case.

> 
> Signed-off-by: Christopher Larson <chris.larson@siemens.com>
> ---
>  scripts/isar-sstate | 34 +++++++++++++++++++++++-----------
>  1 file changed, 23 insertions(+), 11 deletions(-)
> 
> diff --git a/scripts/isar-sstate b/scripts/isar-sstate index 
> 9b20cb8e..b77f73eb 100755
> --- a/scripts/isar-sstate
> +++ b/scripts/isar-sstate
> @@ -154,10 +154,12 @@ SstateCacheEntry = namedtuple(
>  #
> "${PV}:${PR}:${SSTATE_PKGARCH}:${SSTATE_VERSION}:"
>  
>  # This regex extracts relevant fields:
> -SstateRegex = re.compile(r'sstate:(?P<pn>[^:]*):[^:]*:[^:]*:[^:]*:'
> +SstateRegex =
> re.compile(r'(.*/)?sstate:(?P<pn>[^:]*):[^:]*:[^:]*:[^:]*:'
>                           r'(?P<arch>[^:]*):[^:]*:(?P<hash>[0-9a-
> f]*)_'
>                           r'(?P<task>[^\.]*)\.(?P<suffix>.*)')
> -
> +StampsRegex = re.compile(
> + 

Is this regex hand-crafted or can it be found somewhere in bitbake as well? Is the format documented in bitbake? I'm a bit worried that this might change across versions. If so, please add a note that this might need to be adjusted on bitbake updates.

Felix

>   
> r"(.*/)?(?P<arch>[^/]+)/(?P<pn>[^/]+)/([^/]+)\.do_(?P<task>[^/]+)\.(?
> P<suffix>sigdata)\.(?P<hash>[0-9a-f]{64})"
> +)
>  
>  class SstateTargetBase(object):
>      def __init__(self, path, cached=False):
> @@ -288,12 +290,13 @@ class SstateTargetBase(object):
>  
>  
>  class SstateFileTarget(SstateTargetBase):
> -    def __init__(self, path, **kwargs):
> +    def __init__(self, path, regex=SstateRegex, **kwargs):
>          super().__init__(path, **kwargs)
>          if path.startswith('file://'):
>              path = path[len('file://'):]
>          self.path = path
>          self.basepath = os.path.abspath(path)
> +        self.regex = regex
>  
>      def __repr__(self):
>          return f"file://{self.path}"
> @@ -334,12 +337,13 @@ class SstateFileTarget(SstateTargetBase):
>          for subdir, dirs, files in os.walk(self.basepath):
>              reldir = subdir[(len(self.basepath)+1):]
>              for f in files:
> -                m = SstateRegex.match(f)
> +                relative = os.path.join(reldir, f)
> +                m = self.regex.match(relative)
>                  if m is not None:
>                      islink = os.path.islink(os.path.join(subdir, f))
>                      age = int(now -
> os.path.getmtime(os.path.join(subdir, f)))
>                      all_files.append(SstateCacheEntry(
> -                        path=os.path.join(reldir, f),
> +                        path=relative,
>                          size=os.path.getsize(os.path.join(subdir,
> f)),
>                          islink=islink,
>                          age=age,
> @@ -592,6 +596,9 @@ def arguments():
>      parser.add_argument(
>          '--exit-code', type=int, default=None,
>          help="lint: return this instead of number of found issues")
> +    parser.add_argument(
> +        '--lint-stamps', default=False, action='store_true',
> +        help="lint: assume target is a stamps directory (target must
> be a local path)")
>  
>      args = parser.parse_args()
>      if args.command in 'upload analyze'.split() and args.source is
> None:
> @@ -798,7 +805,7 @@ def sstate_analyze(source, target, **kwargs):
>              print('\n'.join(out))
>  
>  
> -def sstate_lint(target, verbose, sources_dir, build_dir, exit_code, 
> pedantic, **kwargs):
> +def sstate_lint(target, verbose, sources_dir, build_dir, exit_code,
> pedantic, lint_stamps, **kwargs):
>      ADDITIONAL_IGNORED_VARNAMES = 'PP'.split()
>      # only list non-cacheable tasks here
>      # note that these still can break caching of other tasks that 
> depend on these.
> @@ -809,7 +816,10 @@ def sstate_lint(target, verbose, sources_dir, 
> build_dir, exit_code, pedantic, **
>          print(f"WARNING: target {target} does not exist. Nothing to
> analyze.")
>          return 0
>  
> -    cache_sigs = {s.hash: s for s in target.list_all() if 
> s.suffix.endswith('.siginfo')}
> +    if lint_stamps:
> +        cache_sigs = {s.hash: s for s in target.list_all()}
> +    else:
> +        cache_sigs = {s.hash: s for s in target.list_all() if
> s.suffix.endswith('.siginfo')}
>  
>      hits_srcdir = 0
>      hits_builddir = 0
> @@ -891,10 +901,12 @@ def main():
>          target = SstateDavTarget(args.target)
>      elif args.target.startswith('s3://'):
>          target = SstateS3Target(args.target)
> -    elif args.target.startswith('file://'):
> -        target = SstateFileTarget(args.target)
> -    else:  # no protocol given, assume file://
> -        target = SstateFileTarget(args.target)
> +    else:  # Either file://, or no protocol given, assume file://
> +        target = SstateFileTarget(args.target, StampsRegex if
> args.lint_stamps else SstateRegex)
> +
> +    if args.lint_stamps and not isinstance(target,
> SstateFileTarget):
> +        print("ERROR: --lint-stamps only works with local file
> targets")
> +        return 1
>  
>      args.target = target
>      return globals()[f'sstate_{args.command}'](**vars(args))
> --
> 2.39.2
>

Patch

diff --git a/scripts/isar-sstate b/scripts/isar-sstate
index 9b20cb8e..b77f73eb 100755
--- a/scripts/isar-sstate
+++ b/scripts/isar-sstate
@@ -154,10 +154,12 @@  SstateCacheEntry = namedtuple(
 #                    "${PV}:${PR}:${SSTATE_PKGARCH}:${SSTATE_VERSION}:"
 
 # This regex extracts relevant fields:
-SstateRegex = re.compile(r'sstate:(?P<pn>[^:]*):[^:]*:[^:]*:[^:]*:'
+SstateRegex = re.compile(r'(.*/)?sstate:(?P<pn>[^:]*):[^:]*:[^:]*:[^:]*:'
                          r'(?P<arch>[^:]*):[^:]*:(?P<hash>[0-9a-f]*)_'
                          r'(?P<task>[^\.]*)\.(?P<suffix>.*)')
-
+StampsRegex = re.compile(
+    r"(.*/)?(?P<arch>[^/]+)/(?P<pn>[^/]+)/([^/]+)\.do_(?P<task>[^/]+)\.(?P<suffix>sigdata)\.(?P<hash>[0-9a-f]{64})"
+)
 
 class SstateTargetBase(object):
     def __init__(self, path, cached=False):
@@ -288,12 +290,13 @@  class SstateTargetBase(object):
 
 
 class SstateFileTarget(SstateTargetBase):
-    def __init__(self, path, **kwargs):
+    def __init__(self, path, regex=SstateRegex, **kwargs):
         super().__init__(path, **kwargs)
         if path.startswith('file://'):
             path = path[len('file://'):]
         self.path = path
         self.basepath = os.path.abspath(path)
+        self.regex = regex
 
     def __repr__(self):
         return f"file://{self.path}"
@@ -334,12 +337,13 @@  class SstateFileTarget(SstateTargetBase):
         for subdir, dirs, files in os.walk(self.basepath):
             reldir = subdir[(len(self.basepath)+1):]
             for f in files:
-                m = SstateRegex.match(f)
+                relative = os.path.join(reldir, f)
+                m = self.regex.match(relative)
                 if m is not None:
                     islink = os.path.islink(os.path.join(subdir, f))
                     age = int(now - os.path.getmtime(os.path.join(subdir, f)))
                     all_files.append(SstateCacheEntry(
-                        path=os.path.join(reldir, f),
+                        path=relative,
                         size=os.path.getsize(os.path.join(subdir, f)),
                         islink=islink,
                         age=age,
@@ -592,6 +596,9 @@  def arguments():
     parser.add_argument(
         '--exit-code', type=int, default=None,
         help="lint: return this instead of number of found issues")
+    parser.add_argument(
+        '--lint-stamps', default=False, action='store_true',
+        help="lint: assume target is a stamps directory (target must be a local path)")
 
     args = parser.parse_args()
     if args.command in 'upload analyze'.split() and args.source is None:
@@ -798,7 +805,7 @@  def sstate_analyze(source, target, **kwargs):
             print('\n'.join(out))
 
 
-def sstate_lint(target, verbose, sources_dir, build_dir, exit_code, pedantic, **kwargs):
+def sstate_lint(target, verbose, sources_dir, build_dir, exit_code, pedantic, lint_stamps, **kwargs):
     ADDITIONAL_IGNORED_VARNAMES = 'PP'.split()
     # only list non-cacheable tasks here
     # note that these still can break caching of other tasks that depend on these.
@@ -809,7 +816,10 @@  def sstate_lint(target, verbose, sources_dir, build_dir, exit_code, pedantic, **
         print(f"WARNING: target {target} does not exist. Nothing to analyze.")
         return 0
 
-    cache_sigs = {s.hash: s for s in target.list_all() if s.suffix.endswith('.siginfo')}
+    if lint_stamps:
+        cache_sigs = {s.hash: s for s in target.list_all()}
+    else:
+        cache_sigs = {s.hash: s for s in target.list_all() if s.suffix.endswith('.siginfo')}
 
     hits_srcdir = 0
     hits_builddir = 0
@@ -891,10 +901,12 @@  def main():
         target = SstateDavTarget(args.target)
     elif args.target.startswith('s3://'):
         target = SstateS3Target(args.target)
-    elif args.target.startswith('file://'):
-        target = SstateFileTarget(args.target)
-    else:  # no protocol given, assume file://
-        target = SstateFileTarget(args.target)
+    else:  # Either file://, or no protocol given, assume file://
+        target = SstateFileTarget(args.target, StampsRegex if args.lint_stamps else SstateRegex)
+
+    if args.lint_stamps and not isinstance(target, SstateFileTarget):
+        print("ERROR: --lint-stamps only works with local file targets")
+        return 1
 
     args.target = target
     return globals()[f'sstate_{args.command}'](**vars(args))