From 13e800ff75a426f611c586736b6d46b106ef9ced Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Tue, 30 Jan 2024 18:22:08 -0800 Subject: [PATCH 1/3] add logic to only backup one shard - in progress --- housewatch/clickhouse/backups.py | 11 ++++++++--- housewatch/clickhouse/table.py | 6 ++++++ 2 files changed, 14 insertions(+), 3 deletions(-) create mode 100644 housewatch/clickhouse/table.py diff --git a/housewatch/clickhouse/backups.py b/housewatch/clickhouse/backups.py index 54bdc3e..a353639 100644 --- a/housewatch/clickhouse/backups.py +++ b/housewatch/clickhouse/backups.py @@ -4,6 +4,7 @@ from typing import Dict, Optional from uuid import uuid4 from housewatch.clickhouse.client import run_query +from housewatch.clickhouse.table import is_replicated_table from housewatch.models.backup import ScheduledBackup, ScheduledBackupRun from housewatch.clickhouse.clusters import get_node_per_shard @@ -15,7 +16,7 @@ logger = structlog.get_logger(__name__) -def execute_backup_on_shards( +def execute_backup( query: str, params: Dict[str, str | int] = {}, query_settings: Dict[str, str | int] = {}, @@ -25,6 +26,7 @@ def execute_backup_on_shards( aws_key: Optional[str] = None, aws_secret: Optional[str] = None, base_backup: Optional[str] = None, + is_replicated: bool = False, ): """ This function will execute a backup on each shard in a cluster @@ -57,6 +59,8 @@ def execute_backup_on_shards( item[key[0]] = res[index] response.append(item) responses.append((shard, response)) + if is_replicated: + break return response @@ -87,7 +91,7 @@ def create_table_backup(database, table, bucket, path, cluster=None, aws_key=Non QUERY = """BACKUP TABLE %(database)s.%(table)s TO S3('https://%(bucket)s.s3.amazonaws.com/%(path)s/%(shard)s', '%(aws_key)s', '%(aws_secret)s') ASYNC""" - return execute_backup_on_shards( + return execute_backup( QUERY, { "database": database, @@ -102,6 +106,7 @@ def create_table_backup(database, table, bucket, path, cluster=None, aws_key=Non aws_key=aws_key, aws_secret=aws_secret, base_backup=base_backup, + is_replicated=is_replicated_table(database, table), ) QUERY = """BACKUP TABLE %(database)s.%(table)s TO S3('https://%(bucket)s.s3.amazonaws.com/%(path)s', '%(aws_key)s', '%(aws_secret)s') @@ -133,7 +138,7 @@ def create_database_backup(database, bucket, path, cluster=None, aws_key=None, a TO S3('https://%(bucket)s.s3.amazonaws.com/%(path)s/%(shard)s', '%(aws_key)s', '%(aws_secret)s') ASYNC""" - return execute_backup_on_shards( + return execute_backup( QUERY, { "database": database, diff --git a/housewatch/clickhouse/table.py b/housewatch/clickhouse/table.py new file mode 100644 index 0000000..e977173 --- /dev/null +++ b/housewatch/clickhouse/table.py @@ -0,0 +1,6 @@ +from housewatch.clickhouse.client import run_query + + +def is_replicated_table(database, table): + QUERY = """SELECT is_replicated FROM system.tables WHERE database = '%(database)s' AND name = '%(table)s'""" + return "replicated" in run_query(QUERY, {"database": database, "table": table})[0]["engine"].lower() From 8bf6eca5a75c4ca44dedd6979770ae4985c0ca10 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Tue, 30 Jan 2024 22:47:38 -0800 Subject: [PATCH 2/3] add model for is_sharded and only backup one shard if the table is not sharded --- frontend/src/pages/Backups/Backups.tsx | 13 ++++- .../src/pages/Backups/ScheduledBackups.tsx | 50 ++++++++++++------- housewatch/clickhouse/backups.py | 15 +++--- housewatch/clickhouse/table.py | 17 ++++++- ...011_scheduledbackup_is_sharded_and_more.py | 23 +++++++++ housewatch/models/backup.py | 1 + 6 files changed, 93 insertions(+), 26 deletions(-) create mode 100644 housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py diff --git a/frontend/src/pages/Backups/Backups.tsx b/frontend/src/pages/Backups/Backups.tsx index 7ea1aed..36b662b 100644 --- a/frontend/src/pages/Backups/Backups.tsx +++ b/frontend/src/pages/Backups/Backups.tsx @@ -1,7 +1,7 @@ import React, { useEffect, useState } from 'react' import { usePollingEffect } from '../../utils/usePollingEffect' import { ColumnType } from 'antd/es/table' -import { Table, Button, Form, Input, Modal, Tag, Col, Progress, Row, Tooltip, notification } from 'antd' +import { Table, Button, Form, Input, Checkbox, Modal, Tag, Col, Progress, Row, Tooltip, notification } from 'antd' import useSWR, { mutate } from 'swr' interface BackupRow { @@ -29,6 +29,7 @@ type FieldType = { table?: string bucket?: string path?: string + is_sharded?: boolean aws_access_key_id?: string aws_secret_access_key?: string } @@ -165,6 +166,16 @@ export default function Backups() { + + label="Is Sharded" + name="is_sharded" + initialValue="false" + valuePropName="checked" + rules={[{ required: true, message: 'Is this table sharded?' }]} + > + is sharded + + label="S3 Bucket" name="bucket" diff --git a/frontend/src/pages/Backups/ScheduledBackups.tsx b/frontend/src/pages/Backups/ScheduledBackups.tsx index 275ce3e..d09e9e5 100644 --- a/frontend/src/pages/Backups/ScheduledBackups.tsx +++ b/frontend/src/pages/Backups/ScheduledBackups.tsx @@ -1,9 +1,23 @@ import React, { useEffect, useState } from 'react' import { usePollingEffect } from '../../utils/usePollingEffect' import { ColumnType } from 'antd/es/table' -import { Switch, Select, Table, Button, Form, Input, Modal, Tag, Col, Progress, Row, Tooltip, notification } from 'antd' -import DeleteOutlined from '@ant-design/icons' -import EditOutlined from '@ant-design/icons' +import { + Switch, + Select, + Table, + Button, + Form, + Input, + Checkbox, + Modal, + Tag, + Col, + Progress, + Row, + Tooltip, + notification, +} from 'antd' +import { DeleteOutlined, EditOutlined } from '@ant-design/icons' import { Clusters } from '../Clusters/Clusters' import useSWR, { mutate } from 'swr' @@ -32,6 +46,7 @@ type FieldType = { incremental_schedule?: string database?: string table?: string + is_sharded?: boolean bucket?: string path?: string aws_access_key_id?: string @@ -153,12 +168,12 @@ export default function ScheduledBackups() { { title: 'Last Run Time', dataIndex: 'last_run_time' }, { title: 'Database', dataIndex: 'database' }, { title: 'Table', dataIndex: 'table' }, + { title: 'Is Sharded', dataIndex: 'is_sharded', render: (_, sched) => (sched.is_sharded ? 'Yes' : 'No') }, { title: 'S3 Location', dataIndex: 'bucket', render: (_, sched) => 's3://' + sched.bucket + '/' + sched.path }, - { title: 'Created At', dataIndex: 'created_at' }, { - title: '', + title: 'Actions', dataIndex: 'id', - render: id => { + render: (id: string, rowData: ScheduleRow) => { const deleteBackup = async () => { try { const res = await fetch(`/api/scheduled_backups/${id}`, { @@ -173,21 +188,10 @@ export default function ScheduledBackups() { } } - return ( - - - - ) - }, - }, - { - title: 'Actions', - dataIndex: 'id', - render: (id: string, rowData: ScheduleRow) => { return ( <> handleEdit(rowData)} /> - {/* handleDelete(id)} /> */} + deleteBackup()} style={{ marginLeft: '15px' }} /> ) }, @@ -285,6 +289,16 @@ export default function ScheduledBackups() { + + label="Is Sharded" + name="is_sharded" + initialValue="false" + valuePropName="checked" + rules={[{ required: true, message: 'Is this table sharded?' }]} + > + is sharded + + label="S3 Bucket" name="bucket" diff --git a/housewatch/clickhouse/backups.py b/housewatch/clickhouse/backups.py index a353639..063fdc3 100644 --- a/housewatch/clickhouse/backups.py +++ b/housewatch/clickhouse/backups.py @@ -4,8 +4,8 @@ from typing import Dict, Optional from uuid import uuid4 from housewatch.clickhouse.client import run_query -from housewatch.clickhouse.table import is_replicated_table from housewatch.models.backup import ScheduledBackup, ScheduledBackupRun +from housewatch.clickhouse.table import table_engine_full from housewatch.clickhouse.clusters import get_node_per_shard from django.conf import settings @@ -26,7 +26,7 @@ def execute_backup( aws_key: Optional[str] = None, aws_secret: Optional[str] = None, base_backup: Optional[str] = None, - is_replicated: bool = False, + is_sharded: bool = False, ): """ This function will execute a backup on each shard in a cluster @@ -59,8 +59,8 @@ def execute_backup( item[key[0]] = res[index] response.append(item) responses.append((shard, response)) - if is_replicated: - break + if not is_sharded: + return response return response @@ -82,7 +82,9 @@ def get_backup(backup, cluster=None): return run_query(QUERY, {"uuid": backup}, use_cache=False) -def create_table_backup(database, table, bucket, path, cluster=None, aws_key=None, aws_secret=None, base_backup=None): +def create_table_backup( + database, table, bucket, path, cluster=None, aws_key=None, aws_secret=None, base_backup=None, is_sharded=False +): if aws_key is None or aws_secret is None: aws_key = settings.AWS_ACCESS_KEY_ID aws_secret = settings.AWS_SECRET_ACCESS_KEY @@ -106,7 +108,7 @@ def create_table_backup(database, table, bucket, path, cluster=None, aws_key=Non aws_key=aws_key, aws_secret=aws_secret, base_backup=base_backup, - is_replicated=is_replicated_table(database, table), + is_sharded=is_sharded, ) QUERY = """BACKUP TABLE %(database)s.%(table)s TO S3('https://%(bucket)s.s3.amazonaws.com/%(path)s', '%(aws_key)s', '%(aws_secret)s') @@ -204,6 +206,7 @@ def run_backup(backup_id, incremental=False): backup.aws_access_key_id, backup.aws_secret_access_key, base_backup=base_backup, + is_sharded=backup.is_sharded, ) uuid = str(uuid4()) br = ScheduledBackupRun.objects.create( diff --git a/housewatch/clickhouse/table.py b/housewatch/clickhouse/table.py index e977173..cb89af2 100644 --- a/housewatch/clickhouse/table.py +++ b/housewatch/clickhouse/table.py @@ -2,5 +2,20 @@ def is_replicated_table(database, table): - QUERY = """SELECT is_replicated FROM system.tables WHERE database = '%(database)s' AND name = '%(table)s'""" + QUERY = """SELECT engine FROM system.tables WHERE database = '%(database)s' AND name = '%(table)s'""" return "replicated" in run_query(QUERY, {"database": database, "table": table})[0]["engine"].lower() + + +def table_engine_full(database, table): + QUERY = """SELECT engine_full FROM system.tables WHERE database = '%(database)s' AND name = '%(table)s'""" + return run_query(QUERY, {"database": database, "table": table})[0]["engine_full"] + + +def parse_engine(engine_full): + engine = engine_full.split("(")[0].strip() + params = engine_full.split("(")[1].split(")")[0].split(",") + return engine, params + + +def is_sharded_table(database, table): + return "sharded" in table_engine_full(database, table).lower() diff --git a/housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py b/housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py new file mode 100644 index 0000000..ae32402 --- /dev/null +++ b/housewatch/migrations/0011_scheduledbackup_is_sharded_and_more.py @@ -0,0 +1,23 @@ +# Generated by Django 4.1.1 on 2024-01-31 06:36 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('housewatch', '0010_scheduledbackup_incremental_schedule_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='scheduledbackup', + name='is_sharded', + field=models.BooleanField(default=False), + ), + migrations.AlterField( + model_name='scheduledbackup', + name='table', + field=models.CharField(blank=True, max_length=255, null=True), + ), + ] diff --git a/housewatch/models/backup.py b/housewatch/models/backup.py index b512b00..c19b61e 100644 --- a/housewatch/models/backup.py +++ b/housewatch/models/backup.py @@ -19,6 +19,7 @@ class ScheduledBackup(models.Model): incremental_schedule: models.CharField = models.CharField(max_length=255, null=True) table: models.CharField = models.CharField(max_length=255, null=True, blank=True) database: models.CharField = models.CharField(max_length=255) + is_sharded: models.BooleanField = models.BooleanField(default=False) cluster: models.CharField = models.CharField(max_length=255, null=True) bucket: models.CharField = models.CharField(max_length=255) path: models.CharField = models.CharField(max_length=255) From 65d71501affd5c79758ee476a52623c0a5d31579 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Tue, 30 Jan 2024 22:57:29 -0800 Subject: [PATCH 3/3] ts fixes --- frontend/src/pages/Backups/ScheduledBackups.tsx | 4 +++- frontend/src/pages/Overview/Overview.tsx | 2 +- frontend/src/pages/QueryEditor/QueryEditor.tsx | 2 +- frontend/src/pages/QueryEditor/SavedQueries.tsx | 2 +- frontend/src/pages/SlowQueries/MetricsTab.tsx | 2 +- 5 files changed, 7 insertions(+), 5 deletions(-) diff --git a/frontend/src/pages/Backups/ScheduledBackups.tsx b/frontend/src/pages/Backups/ScheduledBackups.tsx index d09e9e5..439c3bc 100644 --- a/frontend/src/pages/Backups/ScheduledBackups.tsx +++ b/frontend/src/pages/Backups/ScheduledBackups.tsx @@ -17,7 +17,8 @@ import { Tooltip, notification, } from 'antd' -import { DeleteOutlined, EditOutlined } from '@ant-design/icons' +import DeleteOutlined from '@ant-design/icons/DeleteOutlined' +import EditOutlined from '@ant-design/icons/EditOutlined' import { Clusters } from '../Clusters/Clusters' import useSWR, { mutate } from 'swr' @@ -30,6 +31,7 @@ interface ScheduleRow { schedule: string incremental_schedule: string table: string + is_sharded: boolean database: string bucket: string path: string diff --git a/frontend/src/pages/Overview/Overview.tsx b/frontend/src/pages/Overview/Overview.tsx index a8f3a44..c28939b 100644 --- a/frontend/src/pages/Overview/Overview.tsx +++ b/frontend/src/pages/Overview/Overview.tsx @@ -1,7 +1,7 @@ import React, { useEffect, useState } from 'react' import { Line } from '@ant-design/charts' import { Card, Col, Row, Tooltip, notification } from 'antd' -import InfoCircleOutlined from '@ant-design/icons' +import InfoCircleOutlined from '@ant-design/icons/InfoCircleOutlined' import { clickhouseTips } from './tips' import useSWR from 'swr' diff --git a/frontend/src/pages/QueryEditor/QueryEditor.tsx b/frontend/src/pages/QueryEditor/QueryEditor.tsx index 5d0b5d7..66919ba 100644 --- a/frontend/src/pages/QueryEditor/QueryEditor.tsx +++ b/frontend/src/pages/QueryEditor/QueryEditor.tsx @@ -6,7 +6,7 @@ import 'prismjs/components/prism-sql' import 'prismjs/themes/prism.css' import Editor from 'react-simple-code-editor' import { v4 as uuidv4 } from 'uuid' -import SaveOutlined from '@ant-design/icons' +import SaveOutlined from '@ant-design/icons/SaveOutlined' function CreateSavedQueryModal({ modalOpen = false, diff --git a/frontend/src/pages/QueryEditor/SavedQueries.tsx b/frontend/src/pages/QueryEditor/SavedQueries.tsx index 30a9320..d1ab985 100644 --- a/frontend/src/pages/QueryEditor/SavedQueries.tsx +++ b/frontend/src/pages/QueryEditor/SavedQueries.tsx @@ -2,7 +2,7 @@ import { Table, Button, Row, Col, Tooltip } from 'antd' import React, { useEffect, useState } from 'react' import { ColumnType } from 'antd/es/table' import SavedQuery from './SavedQuery' -import ReloadOutlined from '@ant-design/icons' +import ReloadOutlined from '@ant-design/icons/ReloadOutlined' import { useHistory } from 'react-router-dom' import { isoTimestampToHumanReadable } from '../../utils/dateUtils' diff --git a/frontend/src/pages/SlowQueries/MetricsTab.tsx b/frontend/src/pages/SlowQueries/MetricsTab.tsx index f744bab..b76ed18 100644 --- a/frontend/src/pages/SlowQueries/MetricsTab.tsx +++ b/frontend/src/pages/SlowQueries/MetricsTab.tsx @@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react' import { Line } from '@ant-design/plots' // @ts-ignore import { Card, Col, Row, Tooltip, notification } from 'antd' -import InfoCircleOutlined from '@ant-design/icons' +import InfoCircleOutlined from '@ant-design/icons/InfoCircleOutlined' import { NoDataSpinner, QueryDetailData } from './QueryDetail' export default function MetricsTab({ query_hash }: { query_hash: string }) {