Merge pull request #280 from LBHackney-IT/simplify-upgrade

🧹 Simplify, upgrade, clean
LBHackney-IT · Oct 23, 2024 · dc07af3 · dc07af3
2 parents 8b5ce92 + 1480222
commit dc07af3
Show file tree

Hide file tree

Showing 20 changed files with 4,463 additions and 4,576 deletions.
diff --git a/_base.scss b/_base.scss
diff --git a/docs/docs/alloy-ingestion.md b/docs/docs/alloy-ingestion.md
@@ -44,12 +44,13 @@ As part of the refining process, the job checks for a json dictionary stored in
 
 The json is of the form:
 
-
->{<br />
->    "old_name1": "new_name1",<br />
->    "old_name2": "new_name2",<br />
->...
->}
+```json
+{
+    "old_name1": "new_name1",
+    "old_name2": "new_name2",
+    ...
+}
+```
 
 If a key is not present in the table no action will be taken, and similarly if there is no key for a column that is present in the table it remains unchanged. 
 

diff --git a/docs/playbook/ingesting-data/011-tips-on-how-to-write-an-API-Lambda-script.md b/docs/playbook/ingesting-data/011-tips-on-how-to-write-an-API-Lambda-script.md
@@ -38,31 +38,27 @@ You can use whatever you want to make the Python Script. However to have it run
 	 - `lambda_handler(event, lambda_context):` which acts as your main(), and interacts with the functions cell
 	 - A cell with lambda_handler("","") and environment variable setters to trigger above. This will not be copied into your .py script when it comes down to making it in the Data Platform but it's what will help trigger your Lambda Handler event
 
-<details><summary>Example Jupyter Script Format</summary>
-<p>
-
-
-```
-import os
-```
-```
-def print_text(text_string):
-	print(text_string)
-```
-```
-def lambda_handler(event, lambda_context):
-	load_dotenv() # load environment variables
-	string_to_print = getenv("STRING_TO_PRINT")
-	print_text(string_to_print)
-
-```
-```
-import os
-os.environ["STRING_TO_PRINT"] = "Bacon" # Variable to be passed via Terraform
-lambda_handler("","")
-```
-
-</p>
+<details>
+	<summary>Example Jupyter Script Format</summary>
+	```
+	import os
+	```
+	```
+	def print_text(text_string):
+		print(text_string)
+	```
+	```
+	def lambda_handler(event, lambda_context):
+		load_dotenv() # load environment variables
+		string_to_print = getenv("STRING_TO_PRINT")
+		print_text(string_to_print)
+
+	```
+	```
+	import os
+	os.environ["STRING_TO_PRINT"] = "Bacon" # Variable to be passed via Terraform
+	lambda_handler("","")
+	```
 </details>
 
 Once you have your script able to output data from the API locally, we need to modify the script to output to S3.
@@ -82,23 +78,24 @@ Once you have authenticated Boto3. Lets use some AWS functionality
 
 ### Obtaining Secrets from the Secrets Manager
 
-<details><summary>Code to get Secrets from S3</summary>
-<p>
+<details>
+	<summary>Code to get Secrets from S3</summary>
+	<p>
 
 
-```
-secrets_manager_client = boto3.client('secretsmanager')
+	```
+	secrets_manager_client = boto3.client('secretsmanager')
 
-secret_name = getenv("SECRET_NAME")
+	secret_name = getenv("SECRET_NAME")
 
-secret_manager_response = secrets_manager_client.get_secret_value(SecretId=secret_name)
-api_credentials = json.loads(secret_manager_response['SecretString'])
+	secret_manager_response = secrets_manager_client.get_secret_value(SecretId=secret_name)
+	api_credentials = json.loads(secret_manager_response['SecretString'])
 
-api_key = api_credentials.get("api_key")
-secret = api_credentials.get("secret")
-```
+	api_key = api_credentials.get("api_key")
+	secret = api_credentials.get("secret")
+	```
 
-</p>
+	</p>
 </details>
 
 1. Create a secrets manager client with boto3
@@ -113,50 +110,52 @@ secret = api_credentials.get("secret")
 
 You may want to read what files that you have in an S3 Bucket, maybe to determine what data you already have, or maybe to actually read one
 
-<details><summary>Code to List folders</summary>
-<p>
+<details>
+	<summary>Code to List folders</summary>
+	<p>
 
-```
-s3_client = boto3.client('s3')
+	```
+	s3_client = boto3.client('s3')
 
-def list_subfolders_in_directory(s3_client,bucket,directory):
-		response = s3_client.list_objects_v2(
-				Bucket=bucket,
-				Prefix=directory,
-				Delimiter="/")
+	def list_subfolders_in_directory(s3_client,bucket,directory):
+			response = s3_client.list_objects_v2(
+					Bucket=bucket,
+					Prefix=directory,
+					Delimiter="/")
 
-		subfolders = response.get('CommonPrefixes')
-		return subfolders
-```
+			subfolders = response.get('CommonPrefixes')
+			return subfolders
+	```
 
 
-</p>
+	</p>
 </details>
 
 Returns a list of folders at a specific path.
 
-<details><summary>Code to List Files</summary>
-<p>
+<details>
+	<summary>Code to List Files</summary>
+	<p>
 
-```
-s3_client = boto3.client('s3')
-bucket = "Bucket name"
-directory = "Path to where you want to list the files, ending with /"
+	```
+	s3_client = boto3.client('s3')
+	bucket = "Bucket name"
+	directory = "Path to where you want to list the files, ending with /"
 
-def list_s3_files_in_folder_using_client(s3_client,bucket,directory):
+	def list_s3_files_in_folder_using_client(s3_client,bucket,directory):
 
-    response = s3_client.list_objects_v2(Bucket=bucket, Prefix=directory)
-    files = response.get("Contents")
+	    response = s3_client.list_objects_v2(Bucket=bucket, Prefix=directory)
+	    files = response.get("Contents")
 
-    for file in files:
-        file['Key'] = re.sub(string=file['Key'],
-                       pattern=f"{directory}/".format(),
-                       repl="")
-    # returns a list of dictionaries with file metadata
-    return files
-```
+	    for file in files:
+	        file['Key'] = re.sub(string=file['Key'],
+	                       pattern=f"{directory}/".format(),
+	                       repl="")
+	    # returns a list of dictionaries with file metadata
+	    return files
+	```
 
-</p>
+	</p>
 </details>
 
 Returns a list of files at a specific path.
@@ -165,26 +164,27 @@ Returns a list of files at a specific path.
 
 Here I will supply and explain two functions which will help you put files into S3
 
-<details><summary>Output to Landing zone with Formatting</summary>
-<p>
+<details>
+	<summary>Output to Landing zone with Formatting</summary>
+	<p>
 
-```
-from datetime import date
+	```
+	from datetime import date
 
-def output_to_landing_zone(s3_bucket, data, output_folder,filename):
-		todays_date = date.today()
+	def output_to_landing_zone(s3_bucket, data, output_folder,filename):
+			todays_date = date.today()
 
-		day = todays_date.day.zfill(2)
-		month = todays_date.month.zfill(2)
-		year = str(todays_date.year)
+			day = todays_date.day.zfill(2)
+			month = todays_date.month.zfill(2)
+			year = str(todays_date.year)
 
-		return s3_client.put_object(
-				Bucket=s3_bucket,
-				Body=str(data),
-				Key=f"{output_folder}/import_year={year}/import_month={month}/import_day={day}/import_date={todays_date}/{filename}.json")
-```
+			return s3_client.put_object(
+					Bucket=s3_bucket,
+					Body=str(data),
+					Key=f"{output_folder}/import_year={year}/import_month={month}/import_day={day}/import_date={todays_date}/{filename}.json")
+	```
 
-</p>
+	</p>
 </details>
 
 So if you wanted to put a json file into the "**Sandbox**" bucket, and within that bucket, you want the data to be within the "**CRM**" folder, you would call the function with

diff --git a/docs/playbook/transforming-data/using-aws-glue/001-using-glue-studio.md b/docs/playbook/transforming-data/using-aws-glue/001-using-glue-studio.md
@@ -38,11 +38,11 @@ Note: The instructions below assume an S3 Data Source and Target Location.
    **Data source and data target (amongst other operations) must be set to be able to save the job. You can also apply _Transformations_ specific to your job via the Visual tab. See the [AWS Glue Studio Documentation][aws-glue-studio-documentation]**.
 1. To complete the set up you need to select the **Job details** tab.
 1. Complete the _Name_ and optional _Description_ fields. You may use this job as a template for repeat use, so a generic name to use as a template might be useful to begin with.
-1. Select **dataplatform-{environment}-glue-role** where environment is either 'stg' or 'prod'; as the _IAM Role_ for the job.
+1. Select **dataplatform-[environment]-glue-role** where environment is either 'stg' or 'prod'; as the _IAM Role_ for the job.
 1. The remaining standard fields default values are usually fine to use. The programming language Glue Studio creates can be Scala or Python.
 1. In _Advanced properties_ check the _Script filename_ refers to the task being carried out.
-1. Set the _Script path_ to the central scripts S3 bucket: `s3://dataplatform-{environment}-glue-scripts/custom/` where environment is either 'stg' or 'prod' - you can create new folders or specify existing folders in the S3 bucket like this: `s3://dataplatform-{environment}-glue-scripts/custom/YOUR_FOLDER_NAME/`.
-1. Set the _Temporary path_ to the central temp storage S3 bucket: `s3://dataplatform-{environment}-glue-temp-storage/` where environment is either 'stg' or 'prod'.
+1. Set the _Script path_ to the central scripts S3 bucket: `s3://dataplatform-[environment]-glue-scripts/custom/` where environment is either 'stg' or 'prod' - you can create new folders or specify existing folders in the S3 bucket like this: `s3://dataplatform-[environment]-glue-scripts/custom/YOUR_FOLDER_NAME/`.
+1. Set the _Temporary path_ to the central temp storage S3 bucket: `s3://dataplatform-[environment]-glue-temp-storage/` where environment is either 'stg' or 'prod'.
 1. In _Security configuration_ select the appropriate security configuration for your target bucket location (for example for the Raw Zone, use **glue-job-security-configuration-to-raw**).
 1. Ensure the _Server-side encryption_ option is **not checked**, so that it uses the buckets default encryption configuration.
 1. In the _Tags_ section, add the key `PlatformDepartment` and set the value as the name of your department.

diff --git a/docs/spikes/templates/template.md b/docs/spikes/templates/template.md
@@ -5,9 +5,9 @@ tags: [tech-spike]
 layout: layout
 ---
 
-# Spike: {{ title }}
+# Spike: [TITLE]
 
-### SUB-TITLE
+### [SUB-TITLE]
 
 ## Objective
 
@@ -17,14 +17,14 @@ layout: layout
 
 ### Postgres:
 
-####Limitation
+#### Limitation
 
-####Suggestion
+#### Suggestion
 
 ### Elasticsearch:
 
-####Limitation
+#### Limitation
 
-####Suggestion
+#### Suggestion
 
-##Helpful Resources/Documentation
+## Helpful Resources/Documentation
diff --git a/docusaurus.config.js b/docusaurus.config.js
@@ -37,7 +37,7 @@ module.exports = {
     footer: {
       style: "dark",
       links: [],
-      copyright: `Made by HackIT.`,
+      copyright: `Made by Hackney's Data Analytics Platform Team.`,
     },
   },
   presets: [
@@ -50,11 +50,9 @@ module.exports = {
           editUrl: "https://github.com/LBHackney-IT/data-platform-playbook/edit/master/",
         },
         theme: {
-          customCss: [require.resolve("./src/docs.scss")],
+          customCss: './src/css/custom.css',
         },
       },
     ],
-  ],
-  clientModules: [require.resolve("./src/docs.js")],
-  plugins: ["docusaurus-plugin-sass"],
+  ]
 };
diff --git a/package.json b/package.json
@@ -13,19 +13,18 @@
     "format": "yarn prettier --write '**/*.{js,md}'"
   },
   "dependencies": {
-    "@docusaurus/core": "^2.4.1",
-    "@docusaurus/preset-classic": "^2.4.1",
-    "@mdx-js/react": "^1.6.21",
-    "bootstrap": "^4.6.0",
-    "clsx": "^1.1.1",
-    "docusaurus-plugin-sass": "^0.2.3",
-    "prettier": "^2.4.1",
-    "react": "^16.8.4",
-    "react-bootstrap": "^1.5.2",
-    "react-dom": "^16.8.4",
-    "sass": "^1.42.1",
-    "sirv": "^1.0.11",
-    "with": "^7.0.2"
+    "@docusaurus/core": "3.5.2",
+    "@docusaurus/preset-classic": "3.5.2",
+    "@mdx-js/react": "^3.0.0",
+    "clsx": "^2.0.0",
+    "prism-react-renderer": "^2.3.0",
+    "react": "^18.0.0",
+    "react-dom": "^18.0.0"
+  },
+  "devDependencies": {
+    "@docusaurus/module-type-aliases": "3.5.2",
+    "@docusaurus/types": "3.5.2",
+    "prettier": "^2.4.1"
   },
   "browserslist": {
     "production": [
@@ -38,9 +37,5 @@
       "last 1 firefox version",
       "last 1 safari version"
     ]
-  },
-  "resolutions": {
-    "trim": "^0.0.3",
-    "got": "^11.8.5"
   }
 }