Skip to content

Commit

Permalink
Merge pull request cfengine#207 from cfengine/pr-fix_readme
Browse files Browse the repository at this point in the history
Replace packages with modules in README.md

Signed-off-by: jakub-nt <[email protected]>
  • Loading branch information
jakub-nt committed Oct 17, 2024
1 parent ce28c1f commit 4139b4e
Show file tree
Hide file tree
Showing 11 changed files with 435 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Here are the basic commands to set up a repo, add dependencies, build and deploy
cfbs init
```

### List or search available packages
### List or search available modules

```
cfbs search
Expand Down
6 changes: 6 additions & 0 deletions cfbs/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
from cfbs.git_magic import Result, commit_after_command, git_commit_maybe_prompt
from cfbs.prompts import YES_NO_CHOICES, prompt_user
from cfbs.module import Module, is_module_added_manually
from masterfiles.generate_release_information import generate_release_information


class InputDataUpdateFailed(Exception):
Expand Down Expand Up @@ -1204,3 +1205,8 @@ def get_input_command(name, outfile):
log.error("Failed to write json: %s" % e)
return 1
return 0


@cfbs_command("generate-release-information")
def generate_release_information_command():
generate_release_information()
3 changes: 3 additions & 0 deletions cfbs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ def main() -> int:
if args.command in ("info", "show"):
return commands.info_command(args.args)

if args.command == "generate-release-information":
return commands.generate_release_information_command()

if not is_cfbs_repo():
user_error("This is not a cfbs repo, to get started, type: cfbs init")

Expand Down
20 changes: 20 additions & 0 deletions cfbs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,14 @@ def is_cfbs_repo() -> bool:
return os.path.isfile(cfbs_filename())


def immediate_subdirectories(path):
return [f.name for f in os.scandir(path) if f.is_dir()]


def immediate_files(path):
return [f.name for f in os.scandir(path) if not f.is_dir()]


def path_append(dir, subdir):
dir = os.path.abspath(os.path.expanduser(dir))
return dir if not subdir else os.path.join(dir, subdir)
Expand Down Expand Up @@ -278,6 +286,18 @@ def cfbs_dir(append=None) -> str:
return os.path.join(directory, append)


def string_sha256(input):
return hashlib.sha256(input.encode("utf-8")).hexdigest()


def file_sha256(file):
h = hashlib.sha256()

h.update(open(file, "rb").read())

return h.hexdigest()


class FetchError(Exception):
pass

Expand Down
54 changes: 54 additions & 0 deletions masterfiles/analyze.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# TODO merge this with ENT-12099 branch cfbs analyze.py
import os

from cfbs.utils import file_sha256

IGNORED_PATH_COMPONENTS = [".git/", ".gitignore", ".gitattributes"]
# ignore a path iff it contains a component (single file or directory) from this list
# an element of this list should be just one component
# folders should end with '/', files should not
# TODO


def initialize_vcf():
versions_dict = {"versions": {}}
checksums_dict = {"checksums": {}}
files_dict = {"files": {}}

return versions_dict, checksums_dict, files_dict


def versions_checksums_files(
files_dir_path, version, versions_dict, checksums_dict, files_dict
):
for root, dirs, files in os.walk(files_dir_path):
for name in files:
full_relpath = os.path.join(root, name)
tarball_relpath = os.path.relpath(full_relpath, files_dir_path)
file_checksum = file_sha256(full_relpath)

if version not in versions_dict["versions"]:
versions_dict["versions"][version] = {}
if "files" not in versions_dict["versions"][version]:
versions_dict["versions"][version]["files"] = {}
versions_dict["versions"][version]["files"][tarball_relpath] = file_checksum

if not file_checksum in checksums_dict["checksums"]:
checksums_dict["checksums"][file_checksum] = []
checksums_dict["checksums"][file_checksum].append(
{
"file": tarball_relpath,
"version": version,
}
)

if not tarball_relpath in files_dict["files"]:
files_dict["files"][tarball_relpath] = []
files_dict["files"][tarball_relpath].append(
{
"checksum": file_checksum,
"version": version,
}
)

return versions_dict, checksums_dict, files_dict
20 changes: 20 additions & 0 deletions masterfiles/check_download_matches_git.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# check that the downloadable files match the git files, mitigating a build system supply-chain attack
import os
import dictdiffer

from cfbs.utils import read_json


def check_download_matches_git(versions):
download_versions_dict = read_json("versions.json")
git_versions_dict = read_json("versions-git.json")

os.makedirs("differences", exist_ok=True)

for version in versions:
download_version_dict = download_versions_dict["versions"][version]["files"]
git_version_dict = git_versions_dict["versions"][version]["files"]

for diff in list(dictdiffer.diff(download_version_dict, git_version_dict)):
with open("differences/difference-" + version + ".txt", "w") as f:
print(diff, file=f)
34 changes: 34 additions & 0 deletions masterfiles/check_tarball_checksums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from cfbs.utils import file_sha256, immediate_files


def check_tarball_checksums(dir_path, downloaded_versions, reported_checksums):
does_match = True

for version in downloaded_versions:
print(version)

version_path = dir_path / version

versions_files = immediate_files(version_path)
# the tarball should be the only file in the version's directory
tarball_name = versions_files[0]

tarball_path = version_path / tarball_name

tarball_checksum = file_sha256(tarball_path)

if version in ("3.10.0", "3.9.2"):
# 3.10.0 lists a .tar.gz, not a .pkg.tar.gz
# 3.9.2 lists no masterfiles
continue

reported_checksum = reported_checksums[version]

if tarball_checksum != reported_checksum:
does_match = False
print("* checksum difference:")
print(version)
print(tarball_checksum)
print(reported_checksum)

return does_match
159 changes: 159 additions & 0 deletions masterfiles/download_all_versions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from pathlib import Path
from requests_cache import CachedSession
from shutil import unpack_archive
from urllib.request import urlretrieve

DOWNLOAD = True
DEBUG = False

ENTERPRISE_URL = "https://cfengine.com/release-data/enterprise/releases.json"
COMMUNITY_URL = "https://cfengine.com/release-data/community/releases.json"


def print_debug(*args, **kwargs):
if DEBUG:
print(*args, **kwargs)


def check_url_downloadable(session, url):
headers = session.head(url).headers
downloadable = "attachment" in headers.get("Content-Disposition", "")

content_type = headers.get("content-type")
if "xml" in content_type.lower():
downloadable = False
elif "gzip" in content_type.lower():
downloadable = True

return downloadable


def check_analogous_urls(session, version):
url_tarballs = (
"https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-"
+ version
+ ".pkg.tar.gz"
)

url_downloadable = check_url_downloadable(session, url_tarballs)
print_debug("Checking tarballs URL: ", url_downloadable)
print_debug(url_tarballs)
if url_downloadable:
return url_tarballs

url_enterprise = (
"https://cfengine-package-repos.s3.amazonaws.com/enterprise/Enterprise-"
+ version
+ "/misc/cfengine-masterfiles-"
+ version
)

url_enterprise_0 = url_enterprise + ".pkg.tar.gz"
url_enterprise_1 = url_enterprise + "-1.pkg.tar.gz"
url_enterprise_2 = url_enterprise + "-2.pkg.tar.gz"
url_enterprise_3 = url_enterprise + "-3.pkg.tar.gz"

print_debug(
"Checking enterprise-0 URL: ", check_url_downloadable(session, url_enterprise_0)
)
print_debug(
"Checking enterprise-1 URL: ", check_url_downloadable(session, url_enterprise_1)
)
print_debug(
"Checking enterprise-2 URL: ", check_url_downloadable(session, url_enterprise_2)
)
print_debug(
"Checking enterprise-3 URL: ", check_url_downloadable(session, url_enterprise_3)
)

return None


# TODO
# def download_all_versions_community():
# response = session.get(COMMUNITY_URL)
# # "masterfiles is at a different index" in 3.10.1 happens only for Enterprise, not Community


def download_all_versions_enterprise():
session = CachedSession()
response = session.get(ENTERPRISE_URL)
data = response.json()

urls_dict = {}
reported_checksums = {}

for dd in data["releases"]:
version = dd["version"]
print_debug(version)
release_url = dd["URL"]
print_debug(release_url)

subresponse = session.get(release_url)
subdata = subresponse.json()

subdd = subdata["artifacts"]
if "Additional Assets" not in subdd:
print_debug("Warning: no Additional Assets!")
# happens for 3.9.0b1, 3.8.0b1, 3.6.1, 3.6.0
if DEBUG:
check_analogous_urls(session, version)

download_url = None

else:
# for 3.10.0, for some reason, the masterfiles download link points to the .tar.gz tarball, rather than the .pkg.tar.gz tarball
# here, download the .pkg.tar.gz from a hidden analoguous URL instead
if version == "3.10.0":
download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.10.0.pkg.tar.gz"
else:
# there's precisely one version (3.10.1) for which masterfiles is at a different index
if version == "3.10.1":
subdd = subdd["Additional Assets"][1]
else:
subdd = subdd["Additional Assets"][0]

if subdd["Title"] != "Masterfiles ready-to-install tarball":
print_debug("Warning: not masterfiles!")
# happens for 3.10.1, 3.9.2, 3.9.0, 3.8.2, 3.8.1, 3.8.0, 3.6.2--3.7.4
if DEBUG:
check_analogous_urls(session, version)
# 3.10.1: see above
# 3.9.2: no masterfiles listed, but an analogous hidden URL exists
# 3.9.0 and others: no masterfiles listed, and an analogous hidden URLs seemingly do not exist
if version == "3.9.2":
download_url = "https://cfengine-package-repos.s3.amazonaws.com/tarballs/cfengine-masterfiles-3.9.2.pkg.tar.gz"
else:
download_url = None
else:
download_url = subdd["URL"]
reported_checksums[version] = subdd["SHA256"]

print_debug(download_url)
if download_url is not None:
urls_dict[version] = download_url

downloaded_versions = []
if DOWNLOAD:
root_path = Path("./enterprise")
Path.mkdir(root_path, exist_ok=True)

for version, url in urls_dict.items():
# ignore master and .x versions
if url.startswith("http://buildcache"):
continue

downloaded_versions.append(version)
print(url)

version_path = root_path / version
Path.mkdir(version_path, exist_ok=True)

filename = url.split("/")[-1]
tarball_path = version_path / filename
urlretrieve(url, tarball_path)

unpack_archive(tarball_path, version_path / "tarball")

# for local verification of the reported (Enterprise) (.pkg.tar.gz) checksums
return downloaded_versions, reported_checksums
33 changes: 33 additions & 0 deletions masterfiles/generate_release_information.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# TODO document `cfbs generate-release-information`
# this script uses several extra deps compared to the rest of cfbs
import sys
from pathlib import Path

from masterfiles.download_all_versions import download_all_versions_enterprise
from masterfiles.check_tarball_checksums import check_tarball_checksums
from masterfiles.generate_vcf_download import generate_vcf_download
from masterfiles.generate_vcf_git_checkout import generate_vcf_git_checkout
from masterfiles.check_download_matches_git import check_download_matches_git

ENTERPRISE_PATH = Path("./enterprise")


def generate_release_information():
# only needs to be done once (although changes could happen afterwards), and silly to do if already have access to hosted files
downloaded_versions, reported_checksums = download_all_versions_enterprise()
# downloaded_versions, reported_checksums = download_all_versions_community()

# Enterprise 3.9.2 is downloaded but there is no reported checksum, so both args are necessary
if check_tarball_checksums(
ENTERPRISE_PATH, downloaded_versions, reported_checksums
):
print("Every checksum matches")
else:
print("Checksums differ!")
sys.exit(1)

generate_vcf_download(ENTERPRISE_PATH, downloaded_versions)
generate_vcf_git_checkout(downloaded_versions)

check_download_matches_git(downloaded_versions)
# TODO automatic analysis of the difference-*.txts
21 changes: 21 additions & 0 deletions masterfiles/generate_vcf_download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from cfbs.utils import write_json
from masterfiles.analyze import initialize_vcf, versions_checksums_files


def generate_vcf_download(dir_path, downloaded_versions):
"""`dir_path`: the path of the directory containing masterfiles versions subdirectories in the form `dir_path/x.y.z/tarball/`
The `tarball` folder should contain the `masterfiles` folder (older tarballs also have a `modules` folder alongside the `masterfiles` folder).
"""
versions_dict, checksums_dict, files_dict = initialize_vcf()

for version in downloaded_versions:
files_dir_path = dir_path / version / "tarball"

versions_dict, checksums_dict, files_dict = versions_checksums_files(
files_dir_path, version, versions_dict, checksums_dict, files_dict
)

write_json("versions.json", versions_dict)
write_json("checksums.json", checksums_dict)
write_json("files.json", files_dict)
Loading

0 comments on commit 4139b4e

Please sign in to comment.