-
Notifications
You must be signed in to change notification settings - Fork 41
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: load deb822 sources; use add-apt-repository to add #137
Changes from 17 commits
7472c51
66f42cd
20b0f9e
0e6c8b3
8dbd158
9d9c9fa
5a7c22e
bf26ce0
d43074b
484b106
86e78b5
2dd9dcd
6c39d86
246bd56
8717652
9dfadc1
c44a4a5
2df185e
c963280
7fa6295
86ebdd1
c364111
71c5f90
afc85b6
3b7872a
e24001a
497bf0e
7c70fa8
10df1e8
2b726c4
9d3230a
620cb15
eafe869
2945765
1232175
569c39e
b081253
da057f5
08aa90d
8008a97
58c61be
89014aa
d0674e9
3745426
865aad8
b993c54
9ce3e48
2b7cb4e
709e2c7
2820497
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -102,14 +102,15 @@ | |
|
||
import fileinput | ||
import glob | ||
import itertools | ||
import logging | ||
import os | ||
import re | ||
import subprocess | ||
from collections.abc import Mapping | ||
from enum import Enum | ||
from subprocess import PIPE, CalledProcessError, check_output | ||
from typing import Iterable, List, Optional, Tuple, Union | ||
from typing import Dict, Iterable, Iterator, List, Optional, Tuple, Union | ||
from urllib.parse import urlparse | ||
|
||
logger = logging.getLogger(__name__) | ||
|
@@ -122,7 +123,7 @@ | |
|
||
# Increment this PATCH version before using `charmcraft publish-lib` or reset | ||
# to 0 if you are raising the major API version | ||
LIBPATCH = 14 | ||
LIBPATCH = 15 | ||
|
||
|
||
VALID_SOURCE_TYPES = ("deb", "deb-src") | ||
|
@@ -1198,7 +1199,7 @@ class RepositoryMapping(Mapping): | |
""" | ||
|
||
def __init__(self): | ||
self._repository_map = {} | ||
self._repository_map: Dict[str, DebianRepository] = {} | ||
# Repositories that we're adding -- used to implement mode param | ||
self.default_file = "/etc/apt/sources.list" | ||
|
||
|
@@ -1210,6 +1211,9 @@ def __init__(self): | |
for file in glob.iglob("/etc/apt/sources.list.d/*.list"): | ||
self.load(file) | ||
|
||
for file in glob.iglob("/etc/apt/sources.list.d/*.sources"): | ||
self.load_deb822(file) | ||
|
||
def __contains__(self, key: str) -> bool: | ||
"""Magic method for checking presence of repo in mapping.""" | ||
return key in self._repository_map | ||
|
@@ -1231,13 +1235,13 @@ def __setitem__(self, repository_uri: str, repository: DebianRepository) -> None | |
self._repository_map[repository_uri] = repository | ||
|
||
def load(self, filename: str): | ||
"""Load a repository source file into the cache. | ||
"""Load a one-line-style format repository source file into the cache. | ||
|
||
Args: | ||
filename: the path to the repository file | ||
""" | ||
parsed = [] | ||
skipped = [] | ||
parsed: List[int] = [] | ||
skipped: List[int] = [] | ||
with open(filename, "r") as f: | ||
for n, line in enumerate(f): | ||
try: | ||
|
@@ -1314,6 +1318,203 @@ def _parse(line: str, filename: str) -> DebianRepository: | |
else: | ||
raise InvalidSourceError("An invalid sources line was found in %s!", filename) | ||
|
||
def load_deb822(self, filename: str) -> None: | ||
"""Load a deb822 format repository source file into the cache. | ||
|
||
Args: | ||
filename: the path to the repository file | ||
|
||
In contrast to one-line-style, the deb822 format specifies a repository | ||
using a multi-line paragraph. Paragraphs are separated by whitespace, | ||
and each definition consists of lines that are either key: value pairs, | ||
or continuations of the previous value. | ||
|
||
Read more about the deb822 format here: | ||
https://manpages.ubuntu.com/manpages/noble/en/man5/sources.list.5.html | ||
For instance, ubuntu 24.04 (noble) lists its sources using deb822 style in: | ||
/etc/apt/sources.list.d/ubuntu.sources | ||
|
||
The semantics of `load_deb822` slightly different to `load`: | ||
`load` calls `_parse`, which reads a commented out line as an entry that is not enabled | ||
`load_deb822` strips out comments entirely when parsing a file into paragraphs, and | ||
assumes that comments have been removed when parsing individual paragraphs/entry, | ||
instead only reading the 'Enabled' key to determine if an entry is enabled | ||
""" | ||
with open(filename, "r") as f: | ||
repos, errors = self._parse_deb822_lines(f, filename=filename) | ||
|
||
for repo in repos: | ||
repo_identifier = "{}-{}-{}".format(repo.repotype, repo.uri, repo.release) | ||
self._repository_map[repo_identifier] = repo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if there should be a new class - e.g. something like Reason saying is that I think the current There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this is actually a good case for inheritance for maximum backwards compatibility. A EDIT: though maybe backwards compatibility in this way isn't possible since a deb822 source can have multiple repotypes, uris, and releases, which a This is perhaps a good point to think about how to handle a Here are some options for how we could handle the inline key case.
|
||
|
||
if errors: | ||
logger.debug( | ||
"the following %d error(s) were encountered when reading deb822 format sources:\n%s", | ||
len(errors), | ||
"\n".join(str(e) for e in errors), | ||
) | ||
|
||
if repos: | ||
logger.info("parsed %d apt package repositories", len(repos)) | ||
else: | ||
raise InvalidSourceError("all repository lines in '{}' were invalid!".format(filename)) | ||
|
||
@classmethod | ||
def _parse_deb822_lines( | ||
cls, | ||
lines: Iterable[str], | ||
filename: str = "", | ||
) -> Tuple[List[DebianRepository], List[InvalidSourceError]]: | ||
"""Parse lines from a deb822 file into a list of repos and a list of errors.""" | ||
repositories: List[DebianRepository] = [] | ||
errors: List[InvalidSourceError] = [] | ||
for paragraph in cls._iter_deb822_paragraphs(lines): | ||
try: | ||
repos = cls._parse_deb822_paragraph(paragraph, filename=filename) | ||
except InvalidSourceError as e: | ||
errors.append(e) | ||
else: | ||
repositories.extend(repos) | ||
return repositories, errors | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know regexes invoke a lot of opinions among software engineers, but I found this regex to be pretty useful for parsing deb822 stanzas. You can see it in action on regex101 - https://regex101.com/r/UriO7l/1: _deb822_matcher = re.compile(
r"""
(?:Enabled:\s*)?(?P<enabled>.+)?\s?
Types:\s*(?P<repo_types>.{3,})\s
URIs:\s*(?P<uris>.+)\s
Suites:\s*(?P<suites>.+)\s
(?:Components:\s*)?(?P<components>.+)?\s?
(?:Signed-By:\s*)?(?P<gpg_key>.+)?\s?
(?P<options>((.*:\s*)(.+)\s?)*)?
""",
re.VERBOSE,
) Then you can glom all the stanzas in a for stanza in _deb822_matcher.finditer(content):
groups = stanza.groupdict()
enabled = groups.pop("enabled")
repo_types = groups.pop("repo_types").split()
uris = groups.pop("uris").split()
suites = groups.pop("suites").split()
components = groups.pop("components").split()
gpg_key = groups.pop("gpg_key")
raw_options = groups.pop("options")
if enabled not in ["yes", "no", None]:
raise InvalidSourceError("...")
else:
enabled = True if enabled == "yes" or enabled is None else False
if len(suites) == 1 and suites[0].endswith("/") and components is not None:
raise InvalidSourceError("...")
elif components is None:
raise InvalidSourceError("...")
options = {}
for option in raw_options.splitlines():
k, v = option.split(":", maxsplit=1)
options[k] = v.strip() You do lose some of the granularity you're getting such as the line numbers that you're parsing, but there is the added benefit of not needing to maintain a custom file parser. Trade one form of complexity for another. The one benefit of the deb822 format is that it's easier for both humans to understand and for machines to manipulate 😅 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ahh, regexes ... that's how the one-line-style entries are parsed. I certainly wouldn't feel super happy writing a regex like that one from scratch, but maybe it's better to put the complexity in a standard (if controversial) format than using a custom parser. That said, there are a couple of cases that this PR's implementation handles differently.
I don't bring these up to nitpick the regex, but because I'm not sure I'd feel super comfortable adding those features to it, and the more features you add, the more the regex scares me haha There's also the need to consider what to do with fully commented out entries ... in my implementation I just went with stripping out all comments entirely. I think allowing a commented out entry(paragraph) to be read in as a disabled entry opens up a lot of complexity even with a custom parser. I've added some extra cases for the regex here if you want to take a look https://regex101.com/r/7fPKrM/1 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note in case you read the previous comment in emails, I've edited it a bit |
||
|
||
@staticmethod | ||
def _iter_deb822_paragraphs(lines: Iterable[str]) -> Iterator[List[Tuple[int, str]]]: | ||
"""Given lines from a deb822 format file, yield paragraphs. | ||
|
||
A paragraph is a list of numbered lines that make up a source entry, | ||
with comments stripped out (but accounted for in line numbering). | ||
""" | ||
current_paragraph: List[Tuple[int, str]] = [] | ||
for n, line in enumerate(lines): # 0 indexed line numbers, following `load` | ||
if not line.strip(): # blank lines separate paragraphs | ||
if current_paragraph: | ||
yield current_paragraph | ||
current_paragraph = [] | ||
continue | ||
content, _delim, _comment = line.partition("#") | ||
if content.strip(): # skip (potentially indented) comment line | ||
current_paragraph.append((n, content.rstrip())) # preserve indent | ||
if current_paragraph: | ||
yield current_paragraph | ||
|
||
@classmethod | ||
def _parse_deb822_paragraph( | ||
cls, | ||
lines: List[Tuple[int, str]], | ||
filename: str = "", | ||
) -> List[DebianRepository]: | ||
"""Parse a list of numbered lines forming a deb822 style repository definition. | ||
|
||
Args: | ||
lines: a list of numbered lines forming a deb822 paragraph | ||
filename: the name of the file being read (for DebianRepository and errors) | ||
|
||
Raises: | ||
InvalidSourceError if the source type is unknown or contains malformed entries | ||
""" | ||
options, line_numbers = cls._get_deb822_options(lines) | ||
|
||
enabled_field = options.pop("Enabled", "yes") | ||
if enabled_field == "yes": | ||
enabled = True | ||
elif enabled_field == "no": | ||
enabled = False | ||
else: | ||
raise InvalidSourceError( | ||
( | ||
"Bad value '{value}' for entry 'Enabled' (line {enabled_line})" | ||
" in file {file}. If 'Enabled' is present it must be one of" | ||
" yes or no (if absent it defaults to yes)." | ||
).format( | ||
value=enabled_field, | ||
enabled_line=line_numbers["Enabled"], | ||
file=filename, | ||
) | ||
) | ||
|
||
gpg_key = options.pop("Signed-By", "") | ||
|
||
try: | ||
repotypes = options.pop("Types").split() | ||
uris = options.pop("URIs").split() | ||
suites = options.pop("Suites").split() | ||
except KeyError as e: | ||
[key] = e.args | ||
raise InvalidSourceError( | ||
"Missing key '{key}' for entry starting on line {line} in {file}.".format( | ||
key=key, | ||
line=min(line_numbers.values()), | ||
file=filename, | ||
) | ||
) | ||
|
||
components: List[str] | ||
if len(suites) == 1 and suites[0].endswith("/"): | ||
if "Components" in options: | ||
raise InvalidSourceError( | ||
( | ||
"Since 'Suites' (line {suites_line}) specifies" | ||
" a path relative to 'URIs' (line {uris_line})," | ||
" 'Components' (line {components_line}) must be ommitted" | ||
" (in file {file})." | ||
).format( | ||
suites_line=line_numbers["Suites"], | ||
uris_line=line_numbers["URIs"], | ||
components_line=line_numbers["Components"], | ||
file=filename, | ||
) | ||
) | ||
components = [] | ||
else: | ||
if "Components" not in options: | ||
raise InvalidSourceError( | ||
( | ||
"Since 'Suites' (line {suites_line}) does not specify" | ||
" a path relative to 'URIs' (line {uris_line})," | ||
" 'Components' must be present in this paragraph" | ||
" (in file {file})." | ||
).format( | ||
suites_line=line_numbers["Suites"], | ||
uris_line=line_numbers["URIs"], | ||
file=filename, | ||
) | ||
) | ||
components = options.pop("Components").split() | ||
|
||
return [ | ||
DebianRepository( | ||
enabled=enabled, | ||
repotype=repotype, | ||
uri=uri, | ||
release=suite, | ||
groups=components, | ||
filename=filename, | ||
gpg_key_filename=gpg_key, # TODO: gpg_key can be a literal key, not just a filename | ||
options=options, | ||
) | ||
for repotype, uri, suite in itertools.product(repotypes, uris, suites) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm not sure if we should convert deb822 style sources into one-line style format? Reason saying is that the This won't work for deb822 style sources since you need to comment out all lines in the stanza for the source to be disabled, however, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a great point, I didn't look into how EDIT: though maybe backwards compatibility in this way isn't possible since a deb822 source can have multiple repotypes, uris, and releases, which a DebianRepository is expected to just have one of each. This also makes me wonder if we do need to parse in commented out deb822 style entries and treat them as disabled, which could be a pain. More on parsing in next reply. |
||
] | ||
|
||
@staticmethod | ||
def _get_deb822_options( | ||
lines: Iterable[Tuple[int, str]] | ||
) -> Tuple[Dict[str, str], Dict[str, int]]: | ||
parts: Dict[str, List[str]] = {} | ||
line_numbers: Dict[str, int] = {} | ||
current = None | ||
for n, line in lines: | ||
assert "#" not in line # comments should be stripped out | ||
if line.startswith(" "): # continuation of previous key's value | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are tabs acceptable too? If so, |
||
assert current is not None | ||
parts[current].append(line.rstrip()) # preserve indent | ||
continue | ||
raw_key, _, raw_value = line.partition(":") | ||
current = raw_key.strip() | ||
parts[current] = [raw_value.strip()] | ||
line_numbers[current] = n | ||
options = {k: "\n".join(v) for k, v in parts.items()} | ||
return options, line_numbers | ||
|
||
def add(self, repo: DebianRepository, default_filename: Optional[bool] = False) -> None: | ||
"""Add a new repository to the system. | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IIRC the block of code below also needs to be updated to account for the case where
/etc/apt/sources.list
is just comments telling you "hey useubuntu.sources
instead", otherwise you will still see theInvalidSourcesError
reported in #135 asself.load
fails to find any valid sources in/etc/apt/sources.list
:The easiest thing I think we can do here to handle this case, where
/etc/apt/sources.list
only contains comments, is to first check for the existence of/etc/apt/sources.list.d/ubuntu.sources
and then fall back to/etc/apt/sources.list
if*/ubuntu.sources
doesn't exist:There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Excellent point, will add a fix like the one you've suggested