[RFC,1/1] meta: add CycloneDX/SPDX SBOM generation

Message ID 20250220095944.114203-2-felix.moessbauer@siemens.com
State New
Headers show
Series SBOM Generation for isar | expand

Commit Message

Felix Moessbauer Feb. 20, 2025, 9:59 a.m. UTC
From: Christoph Steiger <christoph.steiger@siemens.com>

Add a new class to allow generation of software bill of materials
(SBOM). Supported are the two standard SBOM formats CycloneDX and SPDX.
SBOM generation is enabled per default for all images.

Both formats support the minimal usecase of binary packages information
and their dependencies. Unfortunately there is no proper way to express
the relationships of debian source packages and their corresponding
binary packages in the CDX format, so it is left out there.

The information included in the SBOM is parsed from the dpkg status
file found in the created image.

Signed-off-by: Christoph Steiger <christoph.steiger@siemens.com>
---
 meta/classes/create-sbom.bbclass |  49 ++++
 meta/classes/image.bbclass       |   2 +
 meta/lib/sbom.py                 | 446 +++++++++++++++++++++++++++++++
 meta/lib/sbom_cdx_types.py       |  82 ++++++
 meta/lib/sbom_spdx_types.py      |  95 +++++++
 5 files changed, 674 insertions(+)
 create mode 100644 meta/classes/create-sbom.bbclass
 create mode 100644 meta/lib/sbom.py
 create mode 100644 meta/lib/sbom_cdx_types.py
 create mode 100644 meta/lib/sbom_spdx_types.py

Comments

Gernot Hillier Feb. 20, 2025, 6:58 p.m. UTC | #1
On 20.02.25 10:59, Felix Moessbauer wrote:
> +    if "cyclonedx" in sbom_type:
> +        sbom.generate(d, packages, sbom.SBOMType.CycloneDX, d.getVar("SBOM_DEPLOY_BASE") + ".cyclonedx.json")

According to https://cyclonedx.org/specification/overview/, "Recognized 
file patterns", file extension should be .cdx.json.

--
Gernot

Patch

diff --git a/meta/classes/create-sbom.bbclass b/meta/classes/create-sbom.bbclass
new file mode 100644
index 00000000..8c647699
--- /dev/null
+++ b/meta/classes/create-sbom.bbclass
@@ -0,0 +1,49 @@ 
+# This software is a part of ISAR.
+# Copyright (C) 2025 Siemens AG
+#
+# SPDX-License-Identifier: MIT
+
+# sbom type to generate, accepted are "cyclonedx" and "spdx"
+SBOM_TYPE ?= "cyclonedx spdx"
+
+# general user variables
+SBOM_DISTRO_SUPPLIER ?= "ISAR"
+SBOM_DISTRO_NAME ?= "ISAR-Debian-GNU-Linux"
+SBOM_DISTRO_VERSION ?= "1.0.0"
+SBOM_DISTRO_SUMMARY ?= "Linux distribution built with ISAR"
+SBOM_DOCUMENT_UUID ?= ""
+
+# SPDX specific user variables
+SBOM_SPDX_NAMESPACE_PREFIX ?= "https://spdx.org/spdxdocs"
+
+SBOM_DEPLOY_BASE = "${DEPLOY_DIR_IMAGE}/${IMAGE_FULLNAME}"
+
+SBOM_GEN_VERSION = "0.1.0"
+
+# adapted from the isar-cip-core image_uuid.bbclass
+def generate_document_uuid(d):
+    import uuid
+
+    base_hash = d.getVar("BB_TASKHASH")
+    if base_hash is None:
+        bb.warn("no BB_TASKHASH available, SBOM UUID is not reproducible")
+        return uuid.uuid4()
+    return str(uuid.UUID(base_hash[:32], version=4))
+
+python do_create_sbom() {
+    import sbom
+
+    dpkg_status = d.getVar("IMAGE_ROOTFS") + "/var/lib/dpkg/status"
+    packages = sbom.Package.parse_status_file(dpkg_status)
+
+    if not d.getVar("SBOM_DOCUMENT_UUID"):
+        d.setVar("SBOM_DOCUMENT_UUID", generate_document_uuid(d))
+
+    sbom_type = d.getVar("SBOM_TYPE")
+    if "cyclonedx" in sbom_type:
+        sbom.generate(d, packages, sbom.SBOMType.CycloneDX, d.getVar("SBOM_DEPLOY_BASE") + ".cyclonedx.json")
+    if "spdx" in sbom_type:
+        sbom.generate(d, packages, sbom.SBOMType.SPDX, d.getVar("SBOM_DEPLOY_BASE") + ".spdx.json")
+}
+
+addtask do_create_sbom after do_rootfs before do_build
diff --git a/meta/classes/image.bbclass b/meta/classes/image.bbclass
index 56eca202..e9da6a61 100644
--- a/meta/classes/image.bbclass
+++ b/meta/classes/image.bbclass
@@ -81,6 +81,8 @@  inherit image-postproc-extension
 inherit image-locales-extension
 inherit image-account-extension
 
+inherit create-sbom
+
 # Extra space for rootfs in MB
 ROOTFS_EXTRA ?= "64"
 
diff --git a/meta/lib/sbom.py b/meta/lib/sbom.py
new file mode 100644
index 00000000..d7c79e43
--- /dev/null
+++ b/meta/lib/sbom.py
@@ -0,0 +1,446 @@ 
+# This software is part of ISAR.
+# Copyright (C) 2025 Siemens AG
+#
+# SPDX-License-Identifier: MIT
+
+from dataclasses import dataclass
+from datetime import datetime
+from enum import Enum
+from typing import Dict, List, Type
+import json
+import re
+from uuid import uuid4
+
+import sbom_cdx_types as cdx
+import sbom_spdx_types as spdx
+
+
+class SBOMType(Enum):
+    CycloneDX = (0,)
+    SPDX = (1,)
+
+
+@dataclass
+class SourcePackage:
+    name: str
+    version: str | None
+
+    def purl(self):
+        """Return the PURL of the package."""
+        return "pkg:deb/debian/{}@{}?arch=source".format(self.name, self.version)
+
+    def bom_ref(self, sbom_type: SBOMType) -> str:
+        """Return a unique BOM reference."""
+        if sbom_type == SBOMType.CycloneDX:
+            return cdx.CDXREF_PREFIX + "{}-src".format(self.name)
+        elif sbom_type == SBOMType.SPDX:
+            return spdx.SPDX_REF_PREFIX + "{}-src".format(self.name)
+
+    def parse(s: str) -> Type["SourcePackage"]:
+        split = s.split(" ")
+        name = split[0]
+        try:
+            version = " ".join(split[1:]).strip("()")
+        except IndexError:
+            version = None
+
+        return SourcePackage(name=name, version=version)
+
+
+@dataclass
+class Dependency:
+    name: str
+    version: str | None
+
+    def bom_ref(self, sbom_type: SBOMType) -> str:
+        """Return a unique BOM reference."""
+        if sbom_type == SBOMType.CycloneDX:
+            return cdx.CDX_REF_PREFIX + "{}".format(self.name)
+        elif sbom_type == SBOMType.SPDX:
+            return spdx.SPDX_REF_PREFIX + "{}".format(self.name)
+
+    def parse_multiple(s: str) -> List[Type["Dependency"]]:
+        """Parse a 'Depends' line in the dpkg status file."""
+        dependencies = []
+        for entry in s.split(","):
+            entry = entry.strip()
+            for entry in entry.split("|"):
+                split = entry.split("(")
+                name = split[0].strip()
+                try:
+                    version = split[1].strip(")")
+                except IndexError:
+                    version = None
+                dependencies.append(Dependency(name=name, version=version))
+
+        return dependencies
+
+
+@dataclass
+class Package:
+    """Incomplete representation of a debian package."""
+
+    name: str
+    section: str
+    maintainer: str
+    architecture: str
+    source: SourcePackage
+    version: str
+    depends: List[Dependency]
+    description: str
+    homepage: str
+
+    def purl(self) -> str:
+        """Return the PURL of the package."""
+        purl = "pkg:deb/debian/{}@{}".format(self.name, self.version)
+        if self.architecture:
+            purl = purl + "?arch={}".format(self.architecture)
+        return purl
+
+    def bom_ref(self, sbom_type: SBOMType) -> str:
+        """Return a unique BOM reference."""
+        if sbom_type == SBOMType.CycloneDX:
+            return cdx.CDX_REF_PREFIX + self.name
+        elif sbom_type == SBOMType.SPDX:
+            return spdx.SPDX_REF_PREFIX + self.name
+
+    def parse_status_file(status_file: str) -> List[Type["Package"]]:
+        """Parse a dpkg status file."""
+        packages = []
+        with open(status_file, "r") as f:
+            name = None
+            section = None
+            maintainer = None
+            architecture = None
+            source = None
+            version = None
+            dependencies = None
+            description = None
+            homepage = None
+            for line in f.readlines():
+                if line.strip():
+                    if line[0] == " ":
+                        # this is a description line, we ignore it
+                        continue
+                    else:
+                        split = line.split(":")
+                        key = split[0]
+                        value = ":".join(split[1:]).strip()
+                        if key == "Package":
+                            name = value
+                        elif key == "Section":
+                            section = value
+                        elif key == "Maintainer":
+                            maintainer = value
+                        elif key == "Architecture":
+                            architecture = value
+                        elif key == "Source":
+                            source = SourcePackage.parse(value)
+                        elif key == "Version":
+                            version = value
+                        elif key == "Depends":
+                            dependencies = Dependency.parse_multiple(value)
+                        elif key == "Description":
+                            description = value
+                        elif key == "Homepage":
+                            homepage = value
+                else:
+                    # fixup source version, if not specified it is the same
+                    # as the package version
+                    if source and not source.version:
+                        source.version = version
+                    # empty line means new package, so finish the current one
+                    packages.append(
+                        Package(
+                            name=name,
+                            section=section,
+                            maintainer=maintainer,
+                            architecture=architecture,
+                            source=source,
+                            version=version,
+                            depends=dependencies,
+                            description=description,
+                            homepage=homepage,
+                        )
+                    )
+                    name = None
+                    section = None
+                    maintainer = None
+                    architecture = None
+                    source = None
+                    version = None
+                    dependencies = None
+                    description = None
+                    homepage = None
+
+        return packages
+
+
+def cyclonedx_bom(d, packages: List[Package]) -> Dict:
+    """Return a valid CycloneDX SBOM."""
+    data = []
+    dependencies = []
+
+    pattern = re.compile("(?P<supplier_name>^[^<]*)(\\<(?P<supplier_email>.*)\\>)?")
+    for package in packages:
+        match = pattern.match(package.maintainer)
+        supplier = cdx.CDXSupplier(name=match["supplier_name"])
+        supplier_email = match["supplier_email"]
+        if supplier_email:
+            supplier.contact = [cdx.CDXSupplierContact(email=supplier_email)]
+        entry = cdx.CDXComponent(
+            type=cdx.CDX_COMPONENT_TYPE_LIBRARY,
+            bom_ref=package.bom_ref(SBOMType.CycloneDX),
+            supplier=supplier,
+            name=package.name,
+            version=package.version,
+            description=package.description,
+            purl=package.purl(),
+        )
+        if package.homepage:
+            entry.externalReferences = (
+                cdx.CDXExternalReference(
+                    url=package.homepage,
+                    type=cdx.CDX_PACKAGE_EXTREF_TYPE_WEBSITE,
+                    comment="homepage",
+                ),
+            )
+        data.append(entry)
+
+    distro_bom_ref = cdx.CDX_REF_PREFIX + d.getVar("SBOM_DISTRO_NAME")
+    distro_dependencies = []
+    # after we have found all packages we can start to resolve dependencies
+    package_names = [package.name for package in packages]
+    for package in packages:
+        distro_dependencies.append(package.bom_ref(SBOMType.CycloneDX))
+        if package.depends:
+            deps = []
+            for dep in package.depends:
+                dep_bom_ref = dep.bom_ref(SBOMType.CycloneDX)
+                # it is possibe to specify the same package multiple times, but
+                # in different versions
+                if dep.name in package_names and dep_bom_ref not in deps:
+                    deps.append(dep_bom_ref)
+                else:
+                    # this might happen if we have optional dependencies
+                    continue
+            dependency = cdx.CDXDependency(
+                ref=package.bom_ref(SBOMType.CycloneDX),
+                dependsOn=deps,
+            )
+            dependencies.append(dependency)
+    dependency = cdx.CDXDependency(
+        ref=distro_bom_ref,
+        dependsOn=distro_dependencies,
+    )
+    dependencies.append(dependency)
+
+    doc_uuid = d.getVar("SBOM_DOCUMENT_UUID")
+    distro_component = cdx.CDXComponent(
+        type=cdx.CDX_COMPONENT_TYPE_OS,
+        bom_ref=cdx.CDX_REF_PREFIX + d.getVar("SBOM_DISTRO_NAME"),
+        supplier=cdx.CDXSupplier(name=d.getVar("SBOM_DISTRO_SUPPLIER")),
+        name=d.getVar("SBOM_DISTRO_NAME"),
+        version=d.getVar("SBOM_DISTRO_VERSION"),
+        description=d.getVar("SBOM_DISTRO_SUMMARY"),
+    )
+
+    timestamp = datetime.fromtimestamp(int(d.getVar("SOURCE_DATE_EPOCH")))
+    bom = cdx.CDXBOM(
+        bomFormat=cdx.CDX_BOM_FORMAT,
+        specVersion=cdx.CDX_SPEC_VERSION,
+        serialNumber="urn:uuid:{}".format(doc_uuid if doc_uuid else uuid4()),
+        version=1,
+        metadata=cdx.CDXBOMMetadata(
+            timestamp=timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
+            component=distro_component,
+            tools=cdx.CDXBOMMetadataTool(
+                components=[
+                    cdx.CDXComponent(
+                        type=cdx.CDX_COMPONENT_TYPE_APPLICATION,
+                        name="ISAR SBOM Generator",
+                        version=d.getVar("SBOM_GEN_VERSION"),
+                    )
+                ],
+            ),
+        ),
+        components=data,
+        dependencies=dependencies,
+    )
+    return bom
+
+
+def spdx_bom(d, packages: List[Package]) -> Dict:
+    "Return a valid SPDX SBOM."
+
+    data = []
+    # create a "fake"  entry for the distribution
+    distro_ref = spdx.SPDX_REF_PREFIX + d.getVar("SBOM_DISTRO_NAME")
+    distro_package = spdx.SPDXPackage(
+        SPDXID=distro_ref,
+        name=d.getVar("SBOM_DISTRO_NAME"),
+        versionInfo=d.getVar("SBOM_DISTRO_VERSION"),
+        primaryPackagePurpose=spdx.SPDX_PACKAGE_PURPOSE_OS,
+        supplier="Organization: {}".format(d.getVar("SBOM_DISTRO_SUPPLIER")),
+        downloadLocation=spdx.SPDX_NOASSERTION,
+        filesAnalyzed=False,
+        licenseConcluded=spdx.SPDX_NOASSERTION,
+        licenseDeclared=spdx.SPDX_NOASSERTION,
+        copyrightText=spdx.SPDX_NOASSERTION,
+        summary=d.getVar("SBOM_DISTRO_SUMMARY"),
+    )
+
+    data.append(distro_package)
+
+    pattern = re.compile("(?P<supplier_name>^[^<]*)(\\<(?P<supplier_email>.*)\\>)?")
+    for package in packages:
+        match = pattern.match(package.maintainer)
+        supplier_name = match["supplier_name"]
+        supplier_email = match["supplier_email"]
+        if any([cue in supplier_name.lower() for cue in spdx.SPDX_SUPPLIER_ORG_CUE]):
+            supplier = "Organization: {}".format(supplier_name)
+        else:
+            supplier = "Person: {}".format(supplier_name)
+        if supplier_email:
+            supplier += "({})".format(supplier_email)
+
+        entry = spdx.SPDXPackage(
+            SPDXID=package.bom_ref(SBOMType.SPDX),
+            name=package.name,
+            versionInfo=package.version,
+            primaryPackagePurpose=spdx.SPDX_PACKAGE_PURPOSE_LIBRARY,
+            supplier=supplier,
+            downloadLocation=spdx.SPDX_NOASSERTION,
+            filesAnalyzed=False,
+            # TODO: it should be possible to conclude license/copyright
+            # information, we could look e.g. in /usr/share/doc/*/copyright
+            licenseConcluded=spdx.SPDX_NOASSERTION,
+            licenseDeclared=spdx.SPDX_NOASSERTION,
+            copyrightText=spdx.SPDX_NOASSERTION,
+            summary=package.description,
+            externalRefs=[
+                spdx.SPDXExternalRef(
+                    referenceCategory=spdx.SPDX_REFERENCE_CATEGORY_PKG_MANAGER,
+                    referenceType=spdx.SPDX_REFERENCE_TYPE_PURL,
+                    referenceLocator=package.purl(),
+                )
+            ],
+        )
+        if package.homepage:
+            entry.homepage = package.homepage
+        data.append(entry)
+
+        if package.source:
+            src_entry = spdx.SPDXPackage(
+                SPDXID=package.source.bom_ref(SBOMType.SPDX),
+                name=package.source.name,
+                versionInfo=package.source.version,
+                primaryPackagePurpose=spdx.SPDX_PACKAGE_PURPOSE_SRC,
+                supplier=supplier,
+                downloadLocation=spdx.SPDX_NOASSERTION,
+                filesAnalyzed=False,
+                licenseConcluded=spdx.SPDX_NOASSERTION,
+                licenseDeclared=spdx.SPDX_NOASSERTION,
+                copyrightText=spdx.SPDX_NOASSERTION,
+                summary="debian source code package '{}'".format(package.source.name),
+                externalRefs=[
+                    spdx.SPDXExternalRef(
+                        referenceCategory=spdx.SPDX_REFERENCE_CATEGORY_PKG_MANAGER,
+                        referenceType=spdx.SPDX_REFERENCE_TYPE_PURL,
+                        referenceLocator=package.source.purl(),
+                    )
+                ],
+            )
+            # source packages might be referenced multiple times
+            if src_entry not in data:
+                data.append(src_entry)
+
+    relationships = []
+    # after we have found all packages we can start to resolve dependencies
+    package_names = [package.name for package in packages]
+    for package in packages:
+        relationships.append(
+            spdx.SPDXRelationship(
+                spdxElementId=package.bom_ref(SBOMType.SPDX),
+                relatedSpdxElement=distro_ref,
+                relationshipType=spdx.SPDX_RELATIONSHIP_PACKAGE_OF,
+            )
+        )
+        if package.depends:
+            for dep in package.depends:
+                if dep.name in package_names:
+                    relationship = spdx.SPDXRelationship(
+                        spdxElementId=package.bom_ref(SBOMType.SPDX),
+                        relatedSpdxElement=dep.bom_ref(SBOMType.SPDX),
+                        relationshipType=spdx.SPDX_RELATIONSHIP_DEPENDS_ON,
+                    )
+                    relationships.append(relationship)
+                else:
+                    # this might happen if we have optional dependencies
+                    pass
+        if package.source:
+            relationship = spdx.SPDXRelationship(
+                spdxElementId=package.source.bom_ref(SBOMType.SPDX),
+                relatedSpdxElement=package.bom_ref(SBOMType.SPDX),
+                relationshipType=spdx.SPDX_RELATIONSHIP_GENERATES,
+            )
+            relationships.append(relationship)
+    relationships.append(
+        spdx.SPDXRelationship(
+            spdxElementId=spdx.SPDX_REF_DOCUMENT,
+            relatedSpdxElement=distro_ref,
+            relationshipType=spdx.SPDX_RELATIONSHIP_DESCRIBES,
+        )
+    )
+
+    namespace_uuid = d.getVar("SBOM_DOCUMENT_UUID")
+    timestamp = datetime.fromtimestamp(int(d.getVar("SOURCE_DATE_EPOCH")))
+    bom = spdx.SPDXBOM(
+        SPDXID=spdx.SPDX_REF_DOCUMENT,
+        spdxVersion=spdx.SPDX_VERSION,
+        creationInfo=spdx.SPDXCreationInfo(
+            comment="This document has been generated as part of an ISAR build.",
+            creators=[
+                "Tool: ISAR SBOM Generator - {}".format(d.getVar("SBOM_GEN_VERSION"))
+            ],
+            created=timestamp.strftime("%Y-%m-%dT%H:%M:%SZ"),
+        ),
+        name=d.getVar("SBOM_DISTRO_NAME"),
+        dataLicense="CC0-1.0",
+        documentNamespace="{}/{}-{}".format(
+            d.getVar("SBOM_SPDX_NAMESPACE_PREFIX"),
+            d.getVar("SBOM_DISTRO_NAME"),
+            namespace_uuid if namespace_uuid else uuid4(),
+        ),
+        packages=data,
+        relationships=relationships,
+    )
+    return bom
+
+
+def fixup_dict(o):
+    """Apply fixups for the BOMs.
+
+    This is necessary for some field names and to remove fields with a None
+    value.
+    """
+    dct = vars(o)
+    new_dct = {}
+    for k, v in dct.items():
+        # remove fields with no content
+        if v is not None:
+            # we can not name our fields with dashes, so convert them
+            k = k.replace("_", "-")
+            new_dct[k] = v
+    return new_dct
+
+
+def generate(d, packages: List[Package], sbom_type: SBOMType, out: str):
+    """Generate a SBOM."""
+    if sbom_type == SBOMType.CycloneDX:
+        bom = cyclonedx_bom(d, packages)
+    elif sbom_type == SBOMType.SPDX:
+        bom = spdx_bom(d, packages)
+
+    with open(out, "w") as bom_file:
+        json.dump(bom, bom_file, indent=2, default=fixup_dict, sort_keys=True)
diff --git a/meta/lib/sbom_cdx_types.py b/meta/lib/sbom_cdx_types.py
new file mode 100644
index 00000000..4911cc23
--- /dev/null
+++ b/meta/lib/sbom_cdx_types.py
@@ -0,0 +1,82 @@ 
+# This software is part of ISAR.
+# Copyright (C) 2025 Siemens AG
+#
+# SPDX-License-Identifier: MIT
+
+from dataclasses import dataclass
+from typing import List, Optional
+
+# Minimal implementation of some CycloneDX SBOM types.
+# Please mind that (almost) none of these types are complete, they only
+# reflect what was strictly necessary for immediate SBOM creation
+
+CDX_BOM_FORMAT = "CycloneDX"
+CDX_SPEC_VERSION = "1.6"
+
+CDX_REF_PREFIX = "CDXRef-"
+
+CDX_PACKAGE_EXTREF_TYPE_WEBSITE = "website"
+
+CDX_COMPONENT_TYPE_LIBRARY = "library"
+CDX_COMPONENT_TYPE_APPLICATION = "application"
+CDX_COMPONENT_TYPE_OS = "operating-system"
+
+
+@dataclass
+class CDXDependency:
+    ref: str
+    dependsOn: Optional[str]
+
+
+@dataclass
+class CDXExternalReference:
+    url: str
+    type: str
+    comment: Optional[str] = None
+
+
+@dataclass
+class CDXSupplierContact:
+    email: Optional[str] = None
+
+
+@dataclass
+class CDXSupplier:
+    name: Optional[str] = None
+    contact: Optional[CDXSupplierContact] = None
+
+
+@dataclass
+class CDXComponent:
+    type: str
+    name: str
+    bom_ref: Optional[str] = None
+    supplier: Optional[str] = None
+    version: Optional[CDXSupplier] = None
+    description: Optional[str] = None
+    purl: Optional[str] = None
+    externalReferences: Optional[List[CDXExternalReference]] = None
+    homepage: Optional[str] = None
+
+
+@dataclass
+class CDXBOMMetadataTool:
+    components: Optional[List[CDXComponent]]
+
+
+@dataclass
+class CDXBOMMetadata:
+    timestamp: Optional[str] = None
+    component: Optional[str] = None
+    tools: Optional[List[CDXBOMMetadataTool]] = None
+
+
+@dataclass
+class CDXBOM:
+    bomFormat: str
+    specVersion: str
+    serialNumber: Optional[str] = None
+    version: Optional[str] = None
+    metadata: Optional[CDXBOMMetadata] = None
+    components: Optional[List[CDXComponent]] = None
+    dependencies: Optional[List[CDXDependency]] = None
diff --git a/meta/lib/sbom_spdx_types.py b/meta/lib/sbom_spdx_types.py
new file mode 100644
index 00000000..efd7cc0c
--- /dev/null
+++ b/meta/lib/sbom_spdx_types.py
@@ -0,0 +1,95 @@ 
+# This software is part of ISAR.
+# Copyright (C) 2025 Siemens AG
+#
+# SPDX-License-Identifier: MIT
+
+from dataclasses import dataclass
+from typing import List, Optional
+
+# Minimal implementation of some SPDX SBOM types.
+# Please mind that (almost) none of these types are complete, they only
+# reflect what was strictly necessary for immediate SBOM creation
+
+SPDX_VERSION = "SPDX-2.3"
+
+SPDX_REF_PREFIX = "SPDXRef-"
+
+SPDX_REF_DOCUMENT = "SPDXRef-DOCUMENT"
+
+SPDX_PACKAGE_PURPOSE_LIBRARY = "LIBRARY"
+SPDX_PACKAGE_PURPOSE_OS = "OPERATING_SYSTEM"
+SPDX_PACKAGE_PURPOSE_SRC = "SOURCE"
+
+SPDX_NOASSERTION = "NOASSERTION"
+
+SPDX_RELATIONSHIP_DEPENDS_ON = "DEPENDS_ON"
+SPDX_RELATIONSHIP_PACKAGE_OF = "PACKAGE_OF"
+SPDX_RELATIONSHIP_GENERATES = "GENERATES"
+SPDX_RELATIONSHIP_DESCRIBES = "DESCRIBES"
+
+SPDX_REFERENCE_CATEGORY_PKG_MANAGER = "PACKAGE_MANAGER"
+SPDX_REFERENCE_TYPE_PURL = "purl"
+
+# cues for an organization in the maintainer name
+SPDX_SUPPLIER_ORG_CUE = [
+    "maintainers",
+    "group",
+    "developers",
+    "team",
+    "project",
+    "task force",
+    "strike force",
+    "packagers",
+]
+
+
+@dataclass
+class SPDXRelationship:
+    spdxElementId: str
+    relatedSpdxElement: str
+    relationshipType: str
+
+
+@dataclass
+class SPDXExternalRef:
+    referenceCategory: str
+    referenceType: str
+    referenceLocator: str
+
+
+@dataclass
+class SPDXPackage:
+    SPDXID: str
+    name: str
+    downloadLocation: str
+    filesAnalyzed: Optional[bool] = False
+    versionInfo: Optional[str] = None
+    homepage: Optional[str] = None
+    primaryPackagePurpose: Optional[str] = None
+    supplier: Optional[str] = None
+    licenseConcluded: Optional[str] = None
+    licenseDeclared: Optional[str] = None
+    copyrightText: Optional[str] = None
+    summary: Optional[str] = None
+    externalRefs: Optional[List[SPDXExternalRef]] = None
+
+
+@dataclass
+class SPDXCreationInfo:
+    created: str
+    comment: Optional[str] = None
+    creators: List[str] = None
+
+
+@dataclass
+class SPDXBOM:
+    """Incomplete BOM as of SPDX spec v2.3."""
+
+    SPDXID: str
+    spdxVersion: str
+    creationInfo: SPDXCreationInfo
+    name: str
+    dataLicense: str
+    documentNamespace: str
+    packages: List[SPDXPackage]
+    relationships: List[SPDXRelationship]