[1/1] deduplicate apt sources.list based on filenames

Message ID 20230726061457.2297344-1-felix.moessbauer@siemens.com
State Accepted, archived
Headers show
Series [1/1] deduplicate apt sources.list based on filenames | expand

Commit Message

MOESSBAUER, Felix July 26, 2023, 6:14 a.m. UTC
When combining layers, it can happen that the same DISTRO_APT_SOURCES
entry is added multiple times. This creates duplicate entries in the
bootstrap.list, which is considered invalid by apt. To solve this, we
deduplicate the values in the [HOST_]DISTRO_APT_SOURCES variable.
In addition, this also avoids adding the same SRC_URI entry twice.

Note, that this only solves the trivial case that the duplicate entries
come from the same file. However in case duplicate entries are added
from multiple files with different names, the :remove syntax can be
used to remove these for the affected targets.

Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
---
 meta/recipes-core/isar-bootstrap/isar-bootstrap.inc | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

Comments

Quirin Gylstorff July 26, 2023, 8:59 a.m. UTC | #1
On 7/26/23 08:14, 'Felix Moessbauer' via isar-users wrote:
> When combining layers, it can happen that the same DISTRO_APT_SOURCES
> entry is added multiple times. This creates duplicate entries in the
> bootstrap.list, which is considered invalid by apt. To solve this, we
> deduplicate the values in the [HOST_]DISTRO_APT_SOURCES variable.
> In addition, this also avoids adding the same SRC_URI entry twice.
> 
> Note, that this only solves the trivial case that the duplicate entries
> come from the same file. However in case duplicate entries are added
> from multiple files with different names, the :remove syntax can be
> used to remove these for the affected targets.
> 
> Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> ---
>   meta/recipes-core/isar-bootstrap/isar-bootstrap.inc | 13 ++++++-------
>   1 file changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> index b94ae0bd..8af73a9b 100644
> --- a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> +++ b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> @@ -59,8 +59,8 @@ python () {
>           filename = os.path.relpath(fetcher.localpath(key), topdir)
>           d.appendVar("THIRD_PARTY_APT_KEYFILES", " ${TOPDIR}/%s" % filename)
>   
> -    distro_apt_sources = d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_SOURCES") or ""
> -    for file in distro_apt_sources.split():
> +    distro_apt_sources = get_aptsources_list(d)
> +    for file in distro_apt_sources:
>           d.appendVar("SRC_URI", " file://%s" % file)
>   
>       distro_apt_preferences = d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_PREFERENCES") or ""
> @@ -149,10 +149,11 @@ def aggregate_aptsources_list(d, file_list, file_out):
>   
>   def get_aptsources_list(d):
>       import errno
> +    from collections import OrderedDict
>       apt_sources_var = d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_SOURCES"
> -    list = (d.getVar(apt_sources_var) or "").split()
> +    apt_sources_list = list(OrderedDict.fromkeys((d.getVar(apt_sources_var) or "").split()))
Why not use `set()` if the goal is to remove all duplicates?

Quirin
>       ret = []
> -    for p in list:
> +    for p in apt_sources_list:
>           try:
>               f = bb.parse.resolve_file(p, d)
>               ret.append(f)
> @@ -251,9 +252,7 @@ python do_apt_config_prepare() {
>   
>       apt_sources_out = d.getVar("APTSRCS")
>       apt_sources_init_out = d.getVar("APTSRCS_INIT")
> -    apt_sources_list = (
> -        d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_SOURCES") or ""
> -    ).split()
> +    apt_sources_list = get_aptsources_list(d)
>   
>       aggregate_files(d, apt_sources_list, apt_sources_init_out)
>       aggregate_aptsources_list(d, apt_sources_list, apt_sources_out)
MOESSBAUER, Felix July 27, 2023, 3:26 a.m. UTC | #2
On Wed, 2023-07-26 at 10:59 +0200, 'Gylstorff Quirin' via isar-users
wrote:
> 
> 
> On 7/26/23 08:14, 'Felix Moessbauer' via isar-users wrote:
> > When combining layers, it can happen that the same
> > DISTRO_APT_SOURCES
> > entry is added multiple times. This creates duplicate entries in
> > the
> > bootstrap.list, which is considered invalid by apt. To solve this,
> > we
> > deduplicate the values in the [HOST_]DISTRO_APT_SOURCES variable.
> > In addition, this also avoids adding the same SRC_URI entry twice.
> > 
> > Note, that this only solves the trivial case that the duplicate
> > entries
> > come from the same file. However in case duplicate entries are
> > added
> > from multiple files with different names, the :remove syntax can be
> > used to remove these for the affected targets.
> > 
> > Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> > ---
> >   meta/recipes-core/isar-bootstrap/isar-bootstrap.inc | 13 ++++++--
> > -----
> >   1 file changed, 6 insertions(+), 7 deletions(-)
> > 
> > diff --git a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> > b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> > index b94ae0bd..8af73a9b 100644
> > --- a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> > +++ b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> > @@ -59,8 +59,8 @@ python () {
> >           filename = os.path.relpath(fetcher.localpath(key),
> > topdir)
> >           d.appendVar("THIRD_PARTY_APT_KEYFILES", " ${TOPDIR}/%s" %
> > filename)
> >   
> > -    distro_apt_sources = d.getVar(d.getVar("DISTRO_VARS_PREFIX") +
> > "DISTRO_APT_SOURCES") or ""
> > -    for file in distro_apt_sources.split():
> > +    distro_apt_sources = get_aptsources_list(d)
> > +    for file in distro_apt_sources:
> >           d.appendVar("SRC_URI", " file://%s" % file)
> >   
> >       distro_apt_preferences =
> > d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_PREFERENCES")
> > or ""
> > @@ -149,10 +149,11 @@ def aggregate_aptsources_list(d, file_list,
> > file_out):
> >   
> >   def get_aptsources_list(d):
> >       import errno
> > +    from collections import OrderedDict
> >       apt_sources_var = d.getVar("DISTRO_VARS_PREFIX") +
> > "DISTRO_APT_SOURCES"
> > -    list = (d.getVar(apt_sources_var) or "").split()
> > +    apt_sources_list =
> > list(OrderedDict.fromkeys((d.getVar(apt_sources_var) or
> > "").split()))
> Why not use `set()` if the goal is to remove all duplicates?

The set() does not preserve the order of the entries. However, the
order might be relevant as it influences the order of the entries in
the apt sources list. With the OrderedDict approach, the order is
preserved.

Felix

> 
> Quirin
> >       ret = []
> > -    for p in list:
> > +    for p in apt_sources_list:
> >           try:
> >               f = bb.parse.resolve_file(p, d)
> >               ret.append(f)
> > @@ -251,9 +252,7 @@ python do_apt_config_prepare() {
> >   
> >       apt_sources_out = d.getVar("APTSRCS")
> >       apt_sources_init_out = d.getVar("APTSRCS_INIT")
> > -    apt_sources_list = (
> > -        d.getVar(d.getVar("DISTRO_VARS_PREFIX") +
> > "DISTRO_APT_SOURCES") or ""
> > -    ).split()
> > +    apt_sources_list = get_aptsources_list(d)
> >   
> >       aggregate_files(d, apt_sources_list, apt_sources_init_out)
> >       aggregate_aptsources_list(d, apt_sources_list,
> > apt_sources_out)
>
Quirin Gylstorff July 27, 2023, 9:51 a.m. UTC | #3
On 7/27/23 05:26, MOESSBAUER, Felix (T CED INW-CN) wrote:
> On Wed, 2023-07-26 at 10:59 +0200, 'Gylstorff Quirin' via isar-users
> wrote:
>>
>>
>> On 7/26/23 08:14, 'Felix Moessbauer' via isar-users wrote:
>>> When combining layers, it can happen that the same
>>> DISTRO_APT_SOURCES
>>> entry is added multiple times. This creates duplicate entries in
>>> the
>>> bootstrap.list, which is considered invalid by apt. To solve this,
>>> we
>>> deduplicate the values in the [HOST_]DISTRO_APT_SOURCES variable.
>>> In addition, this also avoids adding the same SRC_URI entry twice.
>>>
>>> Note, that this only solves the trivial case that the duplicate
>>> entries
>>> come from the same file. However in case duplicate entries are
>>> added
>>> from multiple files with different names, the :remove syntax can be
>>> used to remove these for the affected targets.
>>>
>>> Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
>>> ---
>>>    meta/recipes-core/isar-bootstrap/isar-bootstrap.inc | 13 ++++++--
>>> -----
>>>    1 file changed, 6 insertions(+), 7 deletions(-)
>>>
>>> diff --git a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
>>> b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
>>> index b94ae0bd..8af73a9b 100644
>>> --- a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
>>> +++ b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
>>> @@ -59,8 +59,8 @@ python () {
>>>            filename = os.path.relpath(fetcher.localpath(key),
>>> topdir)
>>>            d.appendVar("THIRD_PARTY_APT_KEYFILES", " ${TOPDIR}/%s" %
>>> filename)
>>>
>>> -    distro_apt_sources = d.getVar(d.getVar("DISTRO_VARS_PREFIX") +
>>> "DISTRO_APT_SOURCES") or ""
>>> -    for file in distro_apt_sources.split():
>>> +    distro_apt_sources = get_aptsources_list(d)
>>> +    for file in distro_apt_sources:
>>>            d.appendVar("SRC_URI", " file://%s" % file)
>>>
>>>        distro_apt_preferences =
>>> d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_PREFERENCES")
>>> or ""
>>> @@ -149,10 +149,11 @@ def aggregate_aptsources_list(d, file_list,
>>> file_out):
>>>
>>>    def get_aptsources_list(d):
>>>        import errno
>>> +    from collections import OrderedDict
>>>        apt_sources_var = d.getVar("DISTRO_VARS_PREFIX") +
>>> "DISTRO_APT_SOURCES"
>>> -    list = (d.getVar(apt_sources_var) or "").split()
>>> +    apt_sources_list =
>>> list(OrderedDict.fromkeys((d.getVar(apt_sources_var) or
>>> "").split()))
>> Why not use `set()` if the goal is to remove all duplicates?
> 
> The set() does not preserve the order of the entries. However, the
> order might be relevant as it influences the order of the entries in
> the apt sources list. With the OrderedDict approach, the order is
> preserved.
> 
> Felix

Thanks, the patch looks good to me.
Quirin
Uladzimir Bely Aug. 2, 2023, 9:30 p.m. UTC | #4
On Wed, 2023-07-26 at 06:14 +0000, 'Felix Moessbauer' via isar-users
wrote:
> When combining layers, it can happen that the same DISTRO_APT_SOURCES
> entry is added multiple times. This creates duplicate entries in the
> bootstrap.list, which is considered invalid by apt. To solve this, we
> deduplicate the values in the [HOST_]DISTRO_APT_SOURCES variable.
> In addition, this also avoids adding the same SRC_URI entry twice.
> 
> Note, that this only solves the trivial case that the duplicate
> entries
> come from the same file. However in case duplicate entries are added
> from multiple files with different names, the :remove syntax can be
> used to remove these for the affected targets.
> 
> Signed-off-by: Felix Moessbauer <felix.moessbauer@siemens.com>
> ---
>  meta/recipes-core/isar-bootstrap/isar-bootstrap.inc | 13 ++++++-----
> --
>  1 file changed, 6 insertions(+), 7 deletions(-)
> 
> diff --git a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> index b94ae0bd..8af73a9b 100644
> --- a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> +++ b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
> @@ -59,8 +59,8 @@ python () {
>          filename = os.path.relpath(fetcher.localpath(key), topdir)
>          d.appendVar("THIRD_PARTY_APT_KEYFILES", " ${TOPDIR}/%s" %
> filename)
>  
> -    distro_apt_sources = d.getVar(d.getVar("DISTRO_VARS_PREFIX") +
> "DISTRO_APT_SOURCES") or ""
> -    for file in distro_apt_sources.split():
> +    distro_apt_sources = get_aptsources_list(d)
> +    for file in distro_apt_sources:
>          d.appendVar("SRC_URI", " file://%s" % file)
>  
>      distro_apt_preferences = d.getVar(d.getVar("DISTRO_VARS_PREFIX")
> + "DISTRO_APT_PREFERENCES") or ""
> @@ -149,10 +149,11 @@ def aggregate_aptsources_list(d, file_list,
> file_out):
>  
>  def get_aptsources_list(d):
>      import errno
> +    from collections import OrderedDict
>      apt_sources_var = d.getVar("DISTRO_VARS_PREFIX") +
> "DISTRO_APT_SOURCES"
> -    list = (d.getVar(apt_sources_var) or "").split()
> +    apt_sources_list =
> list(OrderedDict.fromkeys((d.getVar(apt_sources_var) or "").split()))
>      ret = []
> -    for p in list:
> +    for p in apt_sources_list:
>          try:
>              f = bb.parse.resolve_file(p, d)
>              ret.append(f)
> @@ -251,9 +252,7 @@ python do_apt_config_prepare() {
>  
>      apt_sources_out = d.getVar("APTSRCS")
>      apt_sources_init_out = d.getVar("APTSRCS_INIT")
> -    apt_sources_list = (
> -        d.getVar(d.getVar("DISTRO_VARS_PREFIX") +
> "DISTRO_APT_SOURCES") or ""
> -    ).split()
> +    apt_sources_list = get_aptsources_list(d)
>  
>      aggregate_files(d, apt_sources_list, apt_sources_init_out)
>      aggregate_aptsources_list(d, apt_sources_list, apt_sources_out)
> -- 
> 2.34.1
> 

Applied to next, thanks.

Patch

diff --git a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
index b94ae0bd..8af73a9b 100644
--- a/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
+++ b/meta/recipes-core/isar-bootstrap/isar-bootstrap.inc
@@ -59,8 +59,8 @@  python () {
         filename = os.path.relpath(fetcher.localpath(key), topdir)
         d.appendVar("THIRD_PARTY_APT_KEYFILES", " ${TOPDIR}/%s" % filename)
 
-    distro_apt_sources = d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_SOURCES") or ""
-    for file in distro_apt_sources.split():
+    distro_apt_sources = get_aptsources_list(d)
+    for file in distro_apt_sources:
         d.appendVar("SRC_URI", " file://%s" % file)
 
     distro_apt_preferences = d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_PREFERENCES") or ""
@@ -149,10 +149,11 @@  def aggregate_aptsources_list(d, file_list, file_out):
 
 def get_aptsources_list(d):
     import errno
+    from collections import OrderedDict
     apt_sources_var = d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_SOURCES"
-    list = (d.getVar(apt_sources_var) or "").split()
+    apt_sources_list = list(OrderedDict.fromkeys((d.getVar(apt_sources_var) or "").split()))
     ret = []
-    for p in list:
+    for p in apt_sources_list:
         try:
             f = bb.parse.resolve_file(p, d)
             ret.append(f)
@@ -251,9 +252,7 @@  python do_apt_config_prepare() {
 
     apt_sources_out = d.getVar("APTSRCS")
     apt_sources_init_out = d.getVar("APTSRCS_INIT")
-    apt_sources_list = (
-        d.getVar(d.getVar("DISTRO_VARS_PREFIX") + "DISTRO_APT_SOURCES") or ""
-    ).split()
+    apt_sources_list = get_aptsources_list(d)
 
     aggregate_files(d, apt_sources_list, apt_sources_init_out)
     aggregate_aptsources_list(d, apt_sources_list, apt_sources_out)