From b3b81ce3e9f9e6f25b41f463577976628515384a Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Fri, 8 Mar 2024 16:33:45 -0500 Subject: [PATCH 1/4] Update to new website --- .asf.yaml | 22 ++++++++++++- .github/workflows/deploy.yml | 10 +++--- README.md | 1 - config.toml | 25 ++++++++++----- content/en/_index.md | 31 +++++++++++++++++++ content/en/docs/Concepts/_index.md | 1 + .../File Format/Data Pages/compression.md | 1 - .../docs/File Format/Data Pages/encryption.md | 1 - content/en/docs/File Format/Types/_index.md | 1 + .../en/docs/File Format/Types/logicaltypes.md | 2 +- content/en/docs/File Format/configurations.md | 3 +- content/en/docs/File Format/metadata.md | 1 + go.mod | 5 +++ go.sum | 4 +++ 14 files changed, 90 insertions(+), 18 deletions(-) create mode 100644 content/en/_index.md create mode 100644 go.mod create mode 100644 go.sum diff --git a/.asf.yaml b/.asf.yaml index 79c2ea6..389b0c5 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -1,6 +1,26 @@ +github: + description: "Apache Parquet" + homepage: https://parquet.apache.org/ + labels: + - parquet + - apache + - parquet-site + + enabled_merge_buttons: + merge: false + squash: true + rebase: false + + features: + wiki: false + issues: true + projects: false + collaborators: # Note: the number of collaborators is limited to 10 + - vinooganesh + staging: profile: ~ whoami: asf-staging publish: - whoami: asf-site + whoami: asf-site \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9113452..a236190 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -6,11 +6,11 @@ on: jobs: Build_and_Deploy_Site: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 concurrency: group: ${{ github.workflow }}-${{ github.ref }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: recursive fetch-depth: 0 @@ -21,12 +21,12 @@ jobs: hugo-version: 'latest' extended: true - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v4 with: - node-version: '16' + node-version: '20' - name: Cache dependencies - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} diff --git a/README.md b/README.md index cd21993..63829fc 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,6 @@ To create documentation for a new release of `parquet-mr` create a new }} + + Documentation + + + Download + +

Apache Parquet is a columnar storage format available to any project in the Hadoop ecosystem, regardless of the choice of data processing framework, data model or programming language.

+{{< blocks/link-down color="info" >}} +{{< /blocks/cover >}} + + +{{< blocks/section color="white" type="row">}} +{{% blocks/feature icon="fab fa-jira" title="File an Issue" url="https://issues.apache.org/jira/projects/PARQUET/issues" %}} +Or Search Open Issues +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-github" title="Contributions welcome!" url="https://github.com/apache/parquet-mr" %}} +We do a [Pull Request](https://github.com/apache/parquet-mr/pulls) contributions workflow on **GitHub**. New users are always welcome! +{{% /blocks/feature %}} + + +{{% blocks/feature icon="fab fa-twitter" title="Follow us on Twitter!" url="https://twitter.com/ApacheParquet" %}} +For announcement of latest features etc. +{{% /blocks/feature %}} + +{{% /blocks/section %}} \ No newline at end of file diff --git a/content/en/docs/Concepts/_index.md b/content/en/docs/Concepts/_index.md index ed32229..d55a2d3 100644 --- a/content/en/docs/Concepts/_index.md +++ b/content/en/docs/Concepts/_index.md @@ -5,6 +5,7 @@ weight: 4 description: > Glossary of relevant terminology. --- + - *Block (HDFS block)*: This means a block in HDFS and the meaning is unchanged for describing this file format. The file format is designed to work well on top of HDFS. diff --git a/content/en/docs/File Format/Data Pages/compression.md b/content/en/docs/File Format/Data Pages/compression.md index f448983..3217612 100644 --- a/content/en/docs/File Format/Data Pages/compression.md +++ b/content/en/docs/File Format/Data Pages/compression.md @@ -3,7 +3,6 @@ title: "Compression" linkTitle: "Compression" weight: 1 --- - ## Overview Parquet allows the data block inside dictionary pages and data pages to diff --git a/content/en/docs/File Format/Data Pages/encryption.md b/content/en/docs/File Format/Data Pages/encryption.md index e9fbd0f..1f736c5 100644 --- a/content/en/docs/File Format/Data Pages/encryption.md +++ b/content/en/docs/File Format/Data Pages/encryption.md @@ -3,7 +3,6 @@ title: "Parquet Modular Encryption" linkTitle: "Encryption" weight: 1 --- - Parquet files containing sensitive information can be protected by the modular encryption mechanism that encrypts and authenticates the file data and metadata - while allowing for a regular Parquet functionality (columnar projection, predicate pushdown, encoding diff --git a/content/en/docs/File Format/Types/_index.md b/content/en/docs/File Format/Types/_index.md index a079888..b07dc61 100644 --- a/content/en/docs/File Format/Types/_index.md +++ b/content/en/docs/File Format/Types/_index.md @@ -4,6 +4,7 @@ linkTitle: "Types" weight: 5 --- + The types supported by the file format are intended to be as minimal as possible, with a focus on how the types effect on disk storage. For example, 16-bit ints are not explicitly supported in the storage format since they are covered by diff --git a/content/en/docs/File Format/Types/logicaltypes.md b/content/en/docs/File Format/Types/logicaltypes.md index cd610a8..0173b75 100644 --- a/content/en/docs/File Format/Types/logicaltypes.md +++ b/content/en/docs/File Format/Types/logicaltypes.md @@ -10,4 +10,4 @@ of primitive types to a minimum and reuses parquet's efficient encodings. For example, strings are stored as byte arrays (binary) with a UTF8 annotation. These annotations define how to further decode and interpret the data. Annotations are stored as `LogicalType` fields in the file metadata and are -documented in LogicalTypes.md. +documented in LogicalTypes.md. \ No newline at end of file diff --git a/content/en/docs/File Format/configurations.md b/content/en/docs/File Format/configurations.md index 9e21955..f12be5d 100644 --- a/content/en/docs/File Format/configurations.md +++ b/content/en/docs/File Format/configurations.md @@ -5,6 +5,7 @@ weight: 5 --- ### Row Group Size + Larger row groups allow for larger column chunks which makes it possible to do larger sequential IO. Larger groups also require more buffering in the write path (or a two pass write). We recommend large row groups (512MB - 1GB). @@ -18,4 +19,4 @@ Data pages should be considered indivisible so smaller data pages allow for more fine grained reading (e.g. single row lookup). Larger page sizes incur less space overhead (less page headers) and potentially less parsing overhead (processing headers). Note: for sequential scans, it is not expected to read a page -at a time; this is not the IO chunk. We recommend 8KB for page sizes. \ No newline at end of file +at a time; this is not the IO chunk. We recommend 8KB for page sizes. diff --git a/content/en/docs/File Format/metadata.md b/content/en/docs/File Format/metadata.md index 0e5e19b..a2eae25 100644 --- a/content/en/docs/File Format/metadata.md +++ b/content/en/docs/File Format/metadata.md @@ -6,4 +6,5 @@ weight: 5 There are three types of metadata: file metadata, column (chunk) metadata and page header metadata. All thrift structures are serialized using the TCompactProtocol. + ![File Layout](/images/FileFormat.gif) diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..90cb541 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/apache/parquet-site + +go 1.12 + +require github.com/google/docsy v0.9.1 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..645c0da --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/FortAwesome/Font-Awesome v0.0.0-20240108205627-a1232e345536/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo= +github.com/google/docsy v0.9.1 h1:+jqges1YCd+yHeuZ1BUvD8V8mEGVtPxULg5j/vaJ984= +github.com/google/docsy v0.9.1/go.mod h1:saOqKEUOn07Bc0orM/JdIF3VkOanHta9LU5Y53bwN2U= +github.com/twbs/bootstrap v5.2.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0= From 5904d8dbf988bc7b31f6a0b397caa5d41425cfe6 Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Mon, 11 Mar 2024 08:29:31 -0400 Subject: [PATCH 2/4] Adding more cleanup into the same PR i# Changes to be committed: --- .gitmodules | 4 ---- content/search.md | 5 +++++ layouts/404.html | 15 ++++++--------- package.json | 4 ++-- themes/docsy | 1 - 5 files changed, 13 insertions(+), 16 deletions(-) delete mode 100644 .gitmodules create mode 100644 content/search.md delete mode 160000 themes/docsy diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a1524f2..0000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ - -[submodule "themes/docsy"] - path = themes/docsy - url = https://github.com/google/docsy diff --git a/content/search.md b/content/search.md new file mode 100644 index 0000000..31b7cb3 --- /dev/null +++ b/content/search.md @@ -0,0 +1,5 @@ +--- +title: Search Results +layout: search + +--- \ No newline at end of file diff --git a/layouts/404.html b/layouts/404.html index 4087504..b962591 100644 --- a/layouts/404.html +++ b/layouts/404.html @@ -1,9 +1,6 @@ -{{ define "main"}} -
-
-

Not found

-

Oops! This page doesn't exist. Try going back to our home page.

- -
-
-{{ end }} +{{ define "main" -}} +
+

Not found

+

Oops! This page doesn't exist. Try going back to the home page.

+
+{{- end }} \ No newline at end of file diff --git a/package.json b/package.json index 67e9bbd..f81aadd 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ }, "homepage": "https://github.com/apache/parquet-site#readme", "devDependencies": { - "autoprefixer": "^10.4.4", - "postcss": "^8.4.12", + "autoprefixer": "^10.4.17", + "postcss": "^8.4.35", "postcss-cli": "^9.1.0" } } diff --git a/themes/docsy b/themes/docsy deleted file mode 160000 index 868b751..0000000 --- a/themes/docsy +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 868b75107c53196f25c6e57a3c704a556ef1f56e From 662980f00fb4066bf05ece12fa8a0dd1aff34c33 Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Mon, 11 Mar 2024 08:59:45 -0400 Subject: [PATCH 3/4] Moving to hugo.toml --- config.toml => hugo.toml | 2 +- package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename config.toml => hugo.toml (99%) diff --git a/config.toml b/hugo.toml similarity index 99% rename from config.toml rename to hugo.toml index 977dd3a..0e48bbd 100644 --- a/config.toml +++ b/hugo.toml @@ -190,4 +190,4 @@ enable = false min = "0.110.0" [[module.imports]] path = "github.com/google/docsy" - disable = false \ No newline at end of file + disable = false diff --git a/package.json b/package.json index f81aadd..a3e84f8 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,7 @@ }, "homepage": "https://github.com/apache/parquet-site#readme", "devDependencies": { - "autoprefixer": "^10.4.17", + "autoprefixer": "^10.4.18", "postcss": "^8.4.35", "postcss-cli": "^9.1.0" } From c8ee0d493b2d17b344524a790b7fc0af04f3b341 Mon Sep 17 00:00:00 2001 From: Vinoo Ganesh Date: Mon, 11 Mar 2024 09:38:46 -0400 Subject: [PATCH 4/4] Right adjust layout --- layouts/partials/navbar.html | 101 +++++++++++++++++++++++------------ 1 file changed, 66 insertions(+), 35 deletions(-) diff --git a/layouts/partials/navbar.html b/layouts/partials/navbar.html index b20aec0..c0b643a 100644 --- a/layouts/partials/navbar.html +++ b/layouts/partials/navbar.html @@ -1,35 +1,66 @@ -{{ $cover := and (.HasShortcode "blocks/cover") (not .Site.Params.ui.navbar_translucent_over_cover_disable) }} - +{{ $cover := and + (.HasShortcode "blocks/cover") + (not .Site.Params.ui.navbar_translucent_over_cover_disable) +-}} +{{ $baseURL := urls.Parse $.Site.Params.Baseurl -}} + + \ No newline at end of file