diff --git a/Makefile b/Makefile deleted file mode 100644 index d0c3cbf1..00000000 --- a/Makefile +++ /dev/null @@ -1,20 +0,0 @@ -# Minimal makefile for Sphinx documentation -# - -# You can set these variables from the command line, and also -# from the environment for the first two. -SPHINXOPTS ?= -SPHINXBUILD ?= sphinx-build -SOURCEDIR = source -BUILDDIR = build - -# Put it first so that "make" without argument is like "make help". -help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) - -.PHONY: help Makefile - -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/k8s/Dockerfile b/k8s/Dockerfile deleted file mode 100644 index 9d9379a3..00000000 --- a/k8s/Dockerfile +++ /dev/null @@ -1,27 +0,0 @@ -# This node setup image uses old ScyllaDB version as a base because disk setup scripts inside allowed to -# provide locations of raid, mount etc. Newer images have them hardcoded and they don't match host paths within the container. -# -# Using older version of ScyllaDB image is ok'ish from security point of view, -# because we do run `yum update` as one of the steps so we get all the OS/packages bug fixes. -# -# !!! This setup is considered **deprecated** and will be removed soon in favor of different, safer solution. !!! -FROM docker.io/scylladb/scylla:4.1.6 as base - -# Disable scylla repo, as 4.1 is already EOL. -RUN yum-config-manager --disable scylla --disable scylla-generic --disable scylladb-scylla-3rdparty - -# Install scripts dependencies. -RUN yum -y install epel-release && \ - yum -y clean expire-cache && \ - yum -y update && \ - yum install -y hwloc ethtool python3-yaml python3 python3-devel gcc && \ - yum clean all - -RUN pip3 install pyyaml psutil - -ARG cloud_provider - -COPY "k8s/${cloud_provider}_scylla_create_devices" /opt/scylladb/scylla-machine-image/scylla_create_devices -COPY k8s/scylla_k8s_node_setup /opt/scylladb/scylla-machine-image/scylla_k8s_node_setup - -ENTRYPOINT ["/opt/scylladb/scylla-machine-image/scylla_k8s_node_setup"] diff --git a/k8s/aws_scylla_create_devices b/k8s/aws_scylla_create_devices deleted file mode 100755 index ffa066e6..00000000 --- a/k8s/aws_scylla_create_devices +++ /dev/null @@ -1,194 +0,0 @@ -#!/usr/bin/env python3 -# -# Copyright 2020 ScyllaDB -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import re -import os -import sys -import time -import subprocess -import urllib.request -import urllib.error -from pathlib import Path - - -raid_script = "/opt/scylladb/scripts/scylla_raid_setup" -raid_device = "/dev/md%d" -scylla_root = "" - -def scylla_directory(role): - if role == "all": - return scylla_root - else: - return os.path.join(scylla_root, role) - - -def curl_instance_data(url): - max_retries = 5 - retries = 0 - while True: - try: - req = urllib.request.Request(url) - return urllib.request.urlopen(req).read().decode("utf-8") - except urllib.error.HTTPError: - print("Failed to grab %s..." % url) - time.sleep(5) - retries += 1 - if retries >= max_retries: - raise - - -def find_disk(disks, line): - for disk in disks: - if line.find(disk) == -1: - return False - return True - - -def config_array(disks, role, mdidx): - # Is it already constructed - disks.sort() - md_state_path = Path("/proc/mdstat") - with open(md_state_path) as mdstate: - for l in mdstate: - if find_disk(disks, l): - dev = re.search(r"^md\w+", l).group() - print("Found existing RAID %s, will mount it" % dev) - subprocess.check_call(["mount", "-o", "noatime", - "/dev/%s" % dev, - scylla_directory(role)]) - return - print("RAID Array containing %s not found. Creating..." % str(disks)) - disk_devs = ['/dev/%s' % x for x in disks] - subprocess.run([raid_script, "--raiddev", - raid_device % mdidx, "--disks", ",".join(disk_devs), - "--root", scylla_root, - "--volume-role", role, - "--update-fstab"], check=True) - - -def xenify(devname): - dev = curl_instance_data('http://169.254.169.254/latest/meta-data/block-device-mapping/' + devname) - return dev.replace("sd", "xvd") - - -def device_exists(dev): - return os.path.exists("/dev/%s" % dev) - - -def device_is_busy(dev): - try: - fd = os.open(dev, os.O_RDWR | os.O_EXCL) - os.close(fd) - return False - except OSError: - return True - - -# While testing this, I found the following issue at AWS: -# -# $ ls /dev/nvme* -# /dev/nvme0 /dev/nvme0n1 /dev/nvme1 /dev/nvme1n1 -# -# $ curl http://169.254.169.254/latest/meta-data/block-device-mapping/ -# ami -# ebs2 -# ephemeral0 -# root -# -# As one can see, only one of the ephemeral devices were listed. -# -# I saw this happening only on i3 machines, if EBS were listed before -# ephemeral during creation time. However, in that scenario, I saw it -# happening every time I tested. -# -# More info at: -# https://forums.aws.amazon.com/thread.jspa?threadID=250553 -# -# So for nvme devices, we'll just scan the device list and see what we -# find. Since the goal is to differentiate between ephemeral and -# non-ephemeral anyway, and NVMe are always ephemeral, this is -# acceptable -def get_disk_bundles(): - # define preferred disk roles. We'll see soon if we can respect them. - role = { - "ebs": "unused", - "ephemeral": "all" - } - - # Find disk assignments - devmap = curl_instance_data('http://169.254.169.254/latest/meta-data/block-device-mapping/') - typemap = {} - devname = re.compile("^\D+") - nvme_re = re.compile(r"nvme\d+n\d+$") - nvmes_present = list(filter(nvme_re.match, os.listdir("/dev"))) - nvmes_free = [nvme for nvme in nvmes_present if not device_is_busy(os.path.join('/dev/', nvme))] - - if nvmes_free: - typemap["ephemeral"] = nvmes_free - - for dev in devmap.splitlines(): - if dev == "ami" or dev == "root": - continue - - t = devname.match(dev).group() - if role[t] == "unused": - continue - - if t == "ephemeral" and nvmes_present: - continue - - if t not in typemap: - typemap[t] = [] - if not device_exists(xenify(dev)): - continue - typemap[t] += [xenify(dev)] - - # One of the desired types not found: The other type has it all - if "ebs" not in typemap and "ephemeral" not in typemap: - sys.stderr.write("No disks found\n") - sys.exit(0) - elif "ebs" not in typemap: - role["ephemeral"] = "all" - elif "ephemeral" not in typemap: - role["ebs"] = "all" - - # Could happen even if properly invoked through ds2 if one of the - # types is not present, and the other is set to "unused" - if role["ebs"] == role["ephemeral"]: - err_msg = "Exception when parsing config. Both EBS and ephemeral are set to the same role (%s)" - raise Exception(err_msg % (role["ebs"])) - - # If one type configured for all, the other for a specified role, and both present: - # That's valid and sane: respect that and mount one on top of the other. We just need - # make sure that the root is mounted first. - order = list(typemap.keys()) - order.sort() - - mdidx = 0 - for t in order: - config_array(typemap[t], role[t], mdidx) - mdidx += 1 - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Disk creation script for Scylla.') - parser.add_argument('--scylla-data-root', dest='scylla_data_root', action='store', - help='location of Scylla root data directory', default="/var/lib/scylla") - args = parser.parse_args() - - scylla_root = args.scylla_data_root - - get_disk_bundles() diff --git a/k8s/build_image.sh b/k8s/build_image.sh deleted file mode 100755 index cb83f319..00000000 --- a/k8s/build_image.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash -e -# -# Copyright 2020 ScyllaDB -# -# SPDX-License-Identifier: Apache-2.0 - -CLOUD_PROVIDER= - -print_usage() { - echo "build_image.sh -c [aws]" - echo " -c cloud provider" - exit 1 -} -while getopts c: option -do - case "${option}" - in - c) CLOUD_PROVIDER=${OPTARG};; - *) print_usage;; - esac -done - -if [[ ! -e k8s/build_image.sh ]]; then - echo "run build_image.sh in top of scylla-machine-image dir" - exit 1 -fi - -echo "Building in $PWD..." - -VERSION="k8s-${CLOUD_PROVIDER}-node-setup-0.0.2" -IMAGE_REF="scylladb/scylla-machine-image:${VERSION}" - -docker build -f k8s/Dockerfile --build-arg "cloud_provider=${CLOUD_PROVIDER}" -t "${IMAGE_REF}" . diff --git a/k8s/scylla_k8s_node_setup b/k8s/scylla_k8s_node_setup deleted file mode 100755 index 9116ac83..00000000 --- a/k8s/scylla_k8s_node_setup +++ /dev/null @@ -1,73 +0,0 @@ -#!/usr/bin/python3 -# -# Copyright 2020 ScyllaDB -# -# SPDX-License-Identifier: Apache-2.0 - -import os -import shutil -import sys -import pathlib -import signal -import argparse -from subprocess import check_output -sys.path.append('/opt/scylladb/scripts') -from scylla_util import * - -def copytree(src, dst, symlinks=False, ignore=None): - for item in os.listdir(src): - s = os.path.join(src, item) - d = os.path.join(dst, item) - if os.path.isdir(s): - shutil.copytree(s, d, symlinks, ignore) - else: - shutil.copy2(s, d) - -def get_pid(name): - return int(check_output(["pidof","-s",name])) - -if __name__ == '__main__': - root_disk = os.environ.get('ROOT_DISK', "/mnt/raid-disks/disk0") - scylladconf_mount = os.environ.get('SCYLLAD_CONF_MOUNT', '/mnt/scylla.d/') - - parser = argparse.ArgumentParser(description='Scylla setup for k8s') - parser.add_argument('--all', dest='all', action='store_true', - help='Setup everything, it has the same effect as setting each parameter individually') - parser.add_argument('--install-dependencies', dest='install_dependencies', action='store_true', - help='installs Scylla dependencies') - parser.add_argument('--setup-disks', dest='setup_disks', action='store_true', - help='format disks') - parser.add_argument('--setup-network', dest='setup_network', action='store_true', - help='setup network iface') - parser.add_argument('--run-io', dest='run_io', action='store_true', - help='run io tuning') - args = parser.parse_args() - - if not args.all and not args.install_dependencies and not args.setup_disks and not args.setup_network and not args.run_io: - parser.print_help() - os.exit(1) - - if args.all or args.install_dependencies: - run('/opt/scylladb/scripts/scylla_setup --no-coredump-setup --no-sysconfig-setup --no-raid-setup --no-io-setup --no-bootparam-setup --no-ec2-check --no-cpuscaling-setup --no-kernel-check --no-verify-package --no-enable-service --no-selinux-setup --no-version-check --no-node-exporter') - - if args.all or args.setup_disks: - # setup XFS mount - run('/opt/scylladb/scylla-machine-image/scylla_create_devices --scylla-data-root {}'.format(root_disk)) - - if args.all or args.setup_network: - run('/opt/scylladb/scripts/perftune.py --nic eth0 --mode sq --tune net') - - # Notify irqbalance about config change - os.kill(get_pid("irqbalance"), signal.SIGHUP) - - if args.all or args.run_io: - run('/opt/scylladb/scripts/scylla_io_setup') - - copytree('/etc/scylla.d', scylladconf_mount) - - pathlib.Path('/etc/scylla/machine_image_configured').touch() - - print("Setup done!") - - # infinite sleep - signal.pause() diff --git a/source/conf.py b/source/conf.py deleted file mode 100644 index 8d47ac7f..00000000 --- a/source/conf.py +++ /dev/null @@ -1,51 +0,0 @@ -# Configuration file for the Sphinx documentation builder. -# -# This file only contains a selection of the most common options. For a full -# list see the documentation: -# https://www.sphinx-doc.org/en/master/usage/configuration.html - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - - -# -- Project information ----------------------------------------------------- - -project = 'scylla-machine-image' -copyright = '2020, ScyllaDB' -author = 'ScyllaDB' - - -# -- General configuration --------------------------------------------------- - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = ['sphinxjsondomain'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [] - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'alabaster' - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] diff --git a/source/contents.rst b/source/contents.rst deleted file mode 100644 index e852eea9..00000000 --- a/source/contents.rst +++ /dev/null @@ -1,20 +0,0 @@ -.. scylla-machine-image documentation master file, created by - sphinx-quickstart on Thu Apr 2 10:13:11 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to scylla-machine-image's documentation! -================================================ - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - user_data_v3 - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/source/user_data_v3.rst b/source/user_data_v3.rst deleted file mode 100644 index 769e2b50..00000000 --- a/source/user_data_v3.rst +++ /dev/null @@ -1,184 +0,0 @@ -Scylla AMI user-data Format v3 -============================== - -Scylla AMI user-data should be passed as a json object, as described below - -see AWS docs for how to pass user-data into ec2 instances: -https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-add-user-data.html - ------ - -.. json:object:: EC2 User-Data - - User Data that can pass when create EC2 instances - - :property scylla_yaml: Mapping of all fields that would pass down to scylla.yaml configuration file - :proptype scylla_yaml: :json:object:`Scylla YAML` - - :property scylla_startup_args: embedded information about the user that created the issue (NOT YET IMPLEMENTED) - :proptype scylla_startup_args: list - :options scylla_startup_args: default='[]' - - :property developer_mode: Enables developer mode - :proptype developer_mode: boolean - :options developer_mode: default='false' - - :property post_configuration_script: A script to run once AMI first configuration is finished, can be a string encoded in base64. - :proptype post_configuration_script: string - :options post_configuration_script: default='' - - :property post_configuration_script_timeout: Time in secoands to limit the `post_configuration_script` - :proptype post_configuration_script_timeout: int - :options post_configuration_script_timeout: default='600' - - :property start_scylla_on_first_boot: If true, scylla-server would boot at AMI boot - :proptype start_scylla_on_first_boot: boolean - :options start_scylla_on_first_boot: default='true' - - -.. json:object:: Scylla YAML - - All fields that would pass down to scylla.yaml configuration file - see https://docs.scylladb.com/operating-scylla/scylla-yaml/ for all the possible configuration available - listed here only the one get defaults scylla AMI - - :property cluster_name: Name of the cluster - :proptype cluster_name: string - :options cluster_name: default=[generated name that would work for only one node cluster] - - :property experimental: To enable all experimental features add to the scylla.yaml - :proptype experimental: boolean - :options experimental: default='false' - - :property auto_bootstrap: Enable auto bootstrap - :proptype experimental: boolean - :options experimental: default='true' - - :property listen_address: Defaults to ec2 instance private ip - :proptype listen_address: string - - :property broadcast_rpc_address: Defaults to ec2 instance private ip - :proptype broadcast_rpc_address: string - - :property endpoint_snitch: Defaults to 'org.apache.cassandra.locator.Ec2Snitch' - :proptype endpoint_snitch: string - - :property rpc_address: Defaults to '0.0.0.0' - :proptype rpc_address: string - - :property seed_provider: Defaults to ec2 instance private ip - :proptype seed_provider: mapping - - - -Example -------- - -Spinning a new node connecting to "10.0.219.209" as a seed, and installing cloud-init-cfn package at first boot. - -using json -++++++++++ -.. code-block:: json - - { - "scylla_yaml": { - "cluster_name": "test-cluster", - "experimental": true, - "seed_provider": [{"class_name": "org.apache.cassandra.locator.SimpleSeedProvider", - "parameters": [{"seeds": "10.0.219.209"}]}], - }, - "post_configuration_script": "#! /bin/bash\nyum install cloud-init-cfn", - "start_scylla_on_first_boot": true - } - -using yaml -++++++++++ -.. code-block:: yaml - - scylla_yaml: - cluster_name: test-cluster - experimental: true - seed_provider: - - class_name: org.apache.cassandra.locator.SimpleSeedProvider - parameters: - - seeds: 10.0.219.209 - post_configuration_script: "#! /bin/bash\nyum install cloud-init-cfn" - start_scylla_on_first_boot: true - -using mimemultipart -++++++++++++++++++++ - -If other feature of cloud-init are needed, one can use mimemultipart, and pass -a json/yaml with `x-scylla/yaml` or `x-scylla/json` - -more information on cloud-init multipart user-data: - -https://cloudinit.readthedocs.io/en/latest/topics/format.html#mime-multi-part-archive - -.. code-block:: mime - - Content-Type: multipart/mixed; boundary="===============5438789820677534874==" - MIME-Version: 1.0 - - --===============5438789820677534874== - Content-Type: x-scylla/yaml - MIME-Version: 1.0 - Content-Disposition: attachment; filename="scylla_machine_image.yaml" - - scylla_yaml: - cluster_name: test-cluster - experimental: true - seed_provider: - - class_name: org.apache.cassandra.locator.SimpleSeedProvider - parameters: - - seeds: 10.0.219.209 - post_configuration_script: "#! /bin/bash\nyum install cloud-init-cfn" - start_scylla_on_first_boot: true - - --===============5438789820677534874== - Content-Type: text/cloud-config; charset="us-ascii" - MIME-Version: 1.0 - Content-Transfer-Encoding: 7bit - Content-Disposition: attachment; filename="cloud-config.txt" - - #cloud-config - cloud_final_modules: - - [scripts-user, always] - - --===============5438789820677534874==-- - -example of creating the multipart message by python code: - -.. code-block:: python - import json - from email.mime.base import MIMEBase - from email.mime.multipart import MIMEMultipart - - msg = MIMEMultipart() - - scylla_image_configuration = dict( - scylla_yaml=dict( - cluster_name="test_cluster", - listen_address="10.23.20.1", - broadcast_rpc_address="10.23.20.1", - seed_provider=[{ - "class_name": "org.apache.cassandra.locator.SimpleSeedProvider", - "parameters": [{"seeds": "10.23.20.1"}]}], - ) - ) - part = MIMEBase('x-scylla', 'json') - part.set_payload(json.dumps(scylla_image_configuration, indent=4, sort_keys=True)) - part.add_header('Content-Disposition', 'attachment; filename="scylla_machine_image.json"') - msg.attach(part) - - cloud_config = """ - #cloud-config - cloud_final_modules: - - [scripts-user, always] - """ - part = MIMEBase('text', 'cloud-config') - part.set_payload(cloud_config) - part.add_header('Content-Disposition', 'attachment; filename="cloud-config.txt"') - msg.attach(part) - - print(msg) \ No newline at end of file