-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
453 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
FROM python:3.10 | ||
|
||
RUN mkdir -p /content/src /content/bin | ||
WORKDIR /content | ||
|
||
COPY requirements.txt . | ||
RUN pip install --no-cache-dir -r requirements.txt | ||
|
||
ENV PATH="$PATH:/content/bin" | ||
|
||
# Copy in build dependencies only since the build will take a while. | ||
COPY ./bin/ ./bin | ||
COPY ./src/ ./src |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# Dataset: K8s Instructions | ||
|
||
A dataset that has instructions to write K8s YAML files | ||
|
||
## Usage | ||
|
||
Build. | ||
|
||
```sh | ||
docker build -t dataset-k8s-instructions . | ||
``` | ||
|
||
Explore and develop with a Jupyter Lab. | ||
|
||
```sh | ||
# Run a Jupyter Notebook. | ||
docker run -it -e LOAD_DATA_PATH=/data/all.jsonl -v $(pwd)/data:/data -v $(pwd)/src:/dataset/src -p 8888:8888 dataset-k8s-instructions notebook.sh | ||
|
||
# In another terminal: Open browser. | ||
open http://localhost:8888 | ||
|
||
# Now you can edit the contents of `src/`. | ||
|
||
# Re-build the container if you changed anything. | ||
docker build -t dataset-k8s-instructions . | ||
``` | ||
|
||
Fetch data. | ||
|
||
```sh | ||
# Run training job. | ||
docker run -e LOAD_DATA_PATH=/data/all.jsonl -v $(pwd)/data:/data -v $(pwd)/logs:/dataset/logs dataset-k8s-instructions load.sh | ||
|
||
head data/* | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/usr/bin/env sh | ||
|
||
set -xe | ||
|
||
jupyter nbconvert --debug --to notebook --execute /content/src/load.ipynb --output /content/logs/load.ipynb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
#!/usr/bin/env sh | ||
|
||
set -xe | ||
|
||
jupyter lab --allow-root --ip=0.0.0.0 --NotebookApp.token='' --notebook-dir='/dataset' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
jupyterlab | ||
ipywidgets | ||
jupytext |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"attachments": {}, | ||
"cell_type": "markdown", | ||
"id": "ead0b7e2", | ||
"metadata": {}, | ||
"source": [ | ||
"Download K8s instructions dataset and convert to LLM-friendly JSONL format." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"id": "3397f847", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import urllib.request\n", | ||
"import json\n", | ||
"import os" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 3, | ||
"id": "33c8a1ed", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"data_url = \"https://huggingface.co/datasets/substratusai/k8s-instructions/raw/main/k8s-instructions.jsonl\"\n", | ||
"transformed_file_path = \"/content/data/all.jsonl\"" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 4, | ||
"id": "ce5e89c4", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"('k8s-instructions.jsonl', <http.client.HTTPMessage at 0x7f28adff1150>)" | ||
] | ||
}, | ||
"execution_count": 4, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"urllib.request.urlretrieve(data_url, transformed_file_path)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 6, | ||
"id": "ef057cc4-c403-4de5-a6cd-19adb2d086f7", | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"{\"prompt\": \"Write YAML that defines a Kubernetes Deployment named \\\"iis\\\" with 3 replicas. The Deployment selects pods based on the label \\\"app=iis\\\". The pod template includes a container named \\\"iis\\\" that uses the \\\"microsoft/iis\\\" image and exposes port 80 within the container. Additionally, it includes an annotation specifying the isolation type as \\\"hyperv\\\" for experimental purposes.\", \"completion\": \"```yaml\\napiVersion: apps/v1\\nkind: Deployment\\nmetadata:\\n name: iis\\nspec:\\n selector:\\n matchLabels:\\n app: iis\\n replicas: 3\\n template:\\n metadata:\\n labels:\\n app: iis\\n annotations:\\n experimental.windows.kubernetes.io/isolation-type: hyperv\\n spec:\\n containers:\\n - name: iis\\n image: microsoft/iis\\n ports:\\n - containerPort: 80\\n\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that defines a Kubernetes Pod resource named \\\"iis\\\" with the image set to \\\"microsoft/iis:windowsservercore-1709\\\" and the container listening on port 80. The Pod also includes a nodeSelector that ensures the Pod is scheduled on a node with the \\\"kubernetes.io/os\\\" label set to \\\"windows\\\".\", \"completion\": \"```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: iis\\n labels:\\n name: iis\\nspec:\\n containers:\\n - name: iis\\n image: microsoft/iis:windowsservercore-1709\\n ports:\\n - containerPort: 80\\n nodeSelector:\\n \\\"kubernetes.io/os\\\": windows\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that describes a Deployment object for an IIS application in Kubernetes. It specifies that there should be 3 replicas of the deployment and the selector should match the labels with the \\\"app\\\" value set to \\\"iis\\\". The template within the deployment describes the containers to be deployed, with the name set to \\\"iis\\\" and the image being \\\"microsoft/iis\\\". It also sets resource limits for memory to \\\"128Mi\\\" and CPU to 2, and specifies that port 80 should be exposed within the container.\", \"completion\": \"```yaml\\napiVersion: apps/v1\\nkind: Deployment\\nmetadata:\\n name: iis\\nspec:\\n replicas: 3\\n selector:\\n matchLabels:\\n app: iis\\n template:\\n metadata:\\n labels:\\n app: iis\\n spec:\\n containers:\\n - name: iis\\n image: microsoft/iis\\n resources:\\n limits:\\n memory: \\\"128Mi\\\"\\n cpu: 2\\n ports:\\n - containerPort: 80\\n\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that creates a DaemonSet named \\\"my-daemonset\\\" with labels \\\"app: foo\\\". The DaemonSet will only be scheduled on nodes that have the label \\\"kubernetes.io/os: windows\\\". The DaemonSet will run a container named \\\"foo\\\" using the image \\\"microsoft/windowsservercore:1709\\\".\", \"completion\": \"```yaml\\napiVersion: apps/v1\\nkind: DaemonSet\\nmetadata:\\n name: my-daemonset\\n labels:\\n app: foo\\nspec:\\n selector:\\n matchLabels:\\n app: foo\\n template:\\n metadata:\\n labels:\\n app: foo\\n spec:\\n containers:\\n - name: foo\\n image: microsoft/windowsservercore:1709\\n nodeSelector:\\n kubernetes.io/os: windows\\n\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that creates a Secret named \\\"mysecret\\\" of type Opaque with the data stored in base64 encoding for the \\\"username\\\" and \\\"password\\\" keys. \\n\\nThen write YAML that creates a Pod named \\\"my-secret-pod\\\" with a container named \\\"my-secret-pod\\\" running the \\\"microsoft/windowsservercore:1709\\\" image. The container gets its USERNAME and PASSWORD environment variables from the \\\"mysecret\\\" Secret by referencing the respective keys. The Pod is scheduled on a node with the label \\\"kubernetes.io/os\\\" set to \\\"windows\\\".\", \"completion\": \"```yaml\\napiVersion: v1\\nkind: Secret\\nmetadata:\\n name: mysecret\\ntype: Opaque\\ndata:\\n username: YWRtaW4=\\n password: MWYyZDFlMmU2N2Rm\\n\\n---\\n\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: my-secret-pod\\nspec:\\n containers:\\n - name: my-secret-pod\\n image: microsoft/windowsservercore:1709\\n env:\\n - name: USERNAME\\n valueFrom:\\n secretKeyRef:\\n name: mysecret\\n key: username\\n - name: PASSWORD\\n valueFrom:\\n secretKeyRef:\\n name: mysecret\\n key: password\\n nodeSelector:\\n kubernetes.io/os: windows\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that defines a Kubernetes Pod named \\\"my-empty-dir-pod\\\" using the v1 API version. The Pod runs a container based on the \\\"microsoft/windowsservercore:1709\\\" image with the same name. Two emptyDir volumes are defined: \\\"cache-volume\\\" mounted to \\\"/cache\\\" and \\\"scratch-volume\\\" mounted to \\\"C:/scratch\\\". The Pod is scheduled on a Windows node based on the nodeSelector configuration.\", \"completion\": \"```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: my-empty-dir-pod\\nspec:\\n containers:\\n - image: microsoft/windowsservercore:1709\\n name: my-empty-dir-pod\\n volumeMounts:\\n - mountPath: /cache\\n name: cache-volume\\n - mountPath: C:/scratch\\n name: scratch-volume\\n volumes:\\n - name: cache-volume\\n emptyDir: {}\\n - name: scratch-volume\\n emptyDir: {}\\n nodeSelector:\\n kubernetes.io/os: windows\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that defines a ConfigMap named \\\"example-config\\\" with two properties: \\\"example.property.1\\\" that has the value \\\"hello\\\" and \\\"example.property.2\\\" that has the value \\\"world\\\". Then define a Pod named \\\"configmap-pod\\\" that runs a container using the image \\\"redis:3.0-nanoserver\\\" on a Windows node. The container has two environment variables: \\\"EXAMPLE_PROPERTY_1\\\" that takes its value from \\\"example.property.1\\\" in the ConfigMap and \\\"EXAMPLE_PROPERTY_2\\\" that takes its value from \\\"example.property.2\\\" in the ConfigMap.\", \"completion\": \"```yaml\\nkind: ConfigMap\\napiVersion: v1\\nmetadata:\\n name: example-config\\ndata:\\n example.property.1: hello\\n example.property.2: world\\n\\n---\\n\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: configmap-pod\\nspec:\\n containers:\\n - name: configmap-redis\\n image: redis:3.0-nanoserver\\n env:\\n - name: EXAMPLE_PROPERTY_1\\n valueFrom:\\n configMapKeyRef:\\n name: example-config\\n key: example.property.1\\n - name: EXAMPLE_PROPERTY_2\\n valueFrom:\\n configMapKeyRef:\\n name: example-config\\n key: example.property.2\\n nodeSelector:\\n kubernetes.io/os: windows\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that defines a Pod resource named \\\"run-as-username-container-demo\\\". The Pod has a security context with a windowsOptions field that sets the runAsUserName to \\\"ContainerUser\\\". It also has a container named \\\"run-as-username-demo\\\" with an image, command, and a security context that sets the runAsUserName to \\\"ContainerAdministrator\\\". The Pod is targeted to nodes with the label kubernetes.io/os set to \\\"windows\\\".\", \"completion\": \"```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: run-as-username-container-demo\\nspec:\\n securityContext:\\n windowsOptions:\\n runAsUserName: \\\"ContainerUser\\\"\\n containers:\\n - name: run-as-username-demo\\n image: mcr.microsoft.com/windows/servercore:ltsc2019\\n command: [\\\"ping\\\", \\\"-t\\\", \\\"localhost\\\"]\\n securityContext:\\n windowsOptions:\\n runAsUserName: \\\"ContainerAdministrator\\\"\\n nodeSelector:\\n kubernetes.io/os: windows\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that defines a Kubernetes Pod named \\\"run-as-username-pod-demo\\\". This Pod runs on a Windows operating system and uses the container image \\\"mcr.microsoft.com/windows/servercore:ltsc2019\\\". The Pod has a security context that specifies the user \\\"ContainerUser\\\" to run the container as. Additionally, the Pod has a node selector that ensures it is scheduled on a Windows node.\", \"completion\": \"```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: run-as-username-pod-demo\\nspec:\\n securityContext:\\n windowsOptions:\\n runAsUserName: \\\"ContainerUser\\\"\\n containers:\\n - name: run-as-username-demo\\n image: mcr.microsoft.com/windows/servercore:ltsc2019\\n command: [\\\"ping\\\", \\\"-t\\\", \\\"localhost\\\"]\\n nodeSelector:\\n kubernetes.io/os: windows\\n\\n```\"}\n", | ||
"{\"prompt\": \"Write YAML that defines a Pod named \\\"hostpath-volume-pod\\\" using the v1 API version. The Pod runs the \\\"microsoft/windowsservercore:1709\\\" image, and mounts a volume named \\\"foo\\\" at the path \\\"C:\\\\etc\\\\foo\\\" with read-only access. It also specifies that the Pod should run on a node with the label \\\"kubernetes.io/os: windows\\\" and includes the volume configuration for \\\"foo\\\" using a hostPath with the path \\\"C:\\\\etc\\\\foo\\\".\", \"completion\": \"```yaml\\napiVersion: v1\\nkind: Pod\\nmetadata:\\n name: hostpath-volume-pod\\nspec:\\n containers:\\n - name: my-hostpath-volume-pod\\n image: microsoft/windowsservercore:1709\\n volumeMounts:\\n - name: foo\\n mountPath: \\\"C:\\\\\\\\etc\\\\\\\\foo\\\"\\n readOnly: true\\n nodeSelector:\\n kubernetes.io/os: windows\\n volumes:\\n - name: foo\\n hostPath:\\n path: \\\"C:\\\\\\\\etc\\\\\\\\foo\\\"\\n\\n```\"}\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"!head -n 10 { transformed_file_path }" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"jupytext": { | ||
"cell_metadata_filter": "-all", | ||
"main_language": "python", | ||
"notebook_metadata_filter": "-all" | ||
}, | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |