diff --git a/.asf.yaml b/.asf.yaml index 79c2ea6..389b0c5 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -1,6 +1,26 @@ +github: + description: "Apache Parquet" + homepage: https://parquet.apache.org/ + labels: + - parquet + - apache + - parquet-site + + enabled_merge_buttons: + merge: false + squash: true + rebase: false + + features: + wiki: false + issues: true + projects: false + collaborators: # Note: the number of collaborators is limited to 10 + - vinooganesh + staging: profile: ~ whoami: asf-staging publish: - whoami: asf-site + whoami: asf-site \ No newline at end of file diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9113452..a236190 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -6,11 +6,11 @@ on: jobs: Build_and_Deploy_Site: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 concurrency: group: ${{ github.workflow }}-${{ github.ref }} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 with: submodules: recursive fetch-depth: 0 @@ -21,12 +21,12 @@ jobs: hugo-version: 'latest' extended: true - - uses: actions/setup-node@v2 + - uses: actions/setup-node@v4 with: - node-version: '16' + node-version: '20' - name: Cache dependencies - uses: actions/cache@v1 + uses: actions/cache@v4 with: path: ~/.npm key: ${{ runner.os }}-node-${{ hashFiles('**/package-lock.json') }} diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index a1524f2..0000000 --- a/.gitmodules +++ /dev/null @@ -1,4 +0,0 @@ - -[submodule "themes/docsy"] - path = themes/docsy - url = https://github.com/google/docsy diff --git a/README.md b/README.md index cd21993..63829fc 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,6 @@ To create documentation for a new release of `parquet-mr` create a new }} + + Documentation + + + Download + +

Apache Parquet is a columnar storage format available to any project in the Hadoop ecosystem, regardless of the choice of data processing framework, data model or programming language.

+{{< blocks/link-down color="info" >}} +{{< /blocks/cover >}} + + +{{< blocks/section color="white" type="row">}} +{{% blocks/feature icon="fab fa-jira" title="File an Issue" url="https://issues.apache.org/jira/projects/PARQUET/issues" %}} +Or Search Open Issues +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-github" title="Contributions welcome!" url="https://github.com/apache/parquet-mr" %}} +We do a [Pull Request](https://github.com/apache/parquet-mr/pulls) contributions workflow on **GitHub**. New users are always welcome! +{{% /blocks/feature %}} + + +{{% blocks/feature icon="fab fa-twitter" title="Follow us on Twitter!" url="https://twitter.com/ApacheParquet" %}} +For announcement of latest features etc. +{{% /blocks/feature %}} + +{{% /blocks/section %}} \ No newline at end of file diff --git a/content/en/docs/Concepts/_index.md b/content/en/docs/Concepts/_index.md index ed32229..d55a2d3 100644 --- a/content/en/docs/Concepts/_index.md +++ b/content/en/docs/Concepts/_index.md @@ -5,6 +5,7 @@ weight: 4 description: > Glossary of relevant terminology. --- + - *Block (HDFS block)*: This means a block in HDFS and the meaning is unchanged for describing this file format. The file format is designed to work well on top of HDFS. diff --git a/content/en/docs/File Format/Data Pages/compression.md b/content/en/docs/File Format/Data Pages/compression.md index f448983..3217612 100644 --- a/content/en/docs/File Format/Data Pages/compression.md +++ b/content/en/docs/File Format/Data Pages/compression.md @@ -3,7 +3,6 @@ title: "Compression" linkTitle: "Compression" weight: 1 --- - ## Overview Parquet allows the data block inside dictionary pages and data pages to diff --git a/content/en/docs/File Format/Data Pages/encryption.md b/content/en/docs/File Format/Data Pages/encryption.md index e9fbd0f..1f736c5 100644 --- a/content/en/docs/File Format/Data Pages/encryption.md +++ b/content/en/docs/File Format/Data Pages/encryption.md @@ -3,7 +3,6 @@ title: "Parquet Modular Encryption" linkTitle: "Encryption" weight: 1 --- - Parquet files containing sensitive information can be protected by the modular encryption mechanism that encrypts and authenticates the file data and metadata - while allowing for a regular Parquet functionality (columnar projection, predicate pushdown, encoding diff --git a/content/en/docs/File Format/Types/_index.md b/content/en/docs/File Format/Types/_index.md index a079888..b07dc61 100644 --- a/content/en/docs/File Format/Types/_index.md +++ b/content/en/docs/File Format/Types/_index.md @@ -4,6 +4,7 @@ linkTitle: "Types" weight: 5 --- + The types supported by the file format are intended to be as minimal as possible, with a focus on how the types effect on disk storage. For example, 16-bit ints are not explicitly supported in the storage format since they are covered by diff --git a/content/en/docs/File Format/Types/logicaltypes.md b/content/en/docs/File Format/Types/logicaltypes.md index cd610a8..0173b75 100644 --- a/content/en/docs/File Format/Types/logicaltypes.md +++ b/content/en/docs/File Format/Types/logicaltypes.md @@ -10,4 +10,4 @@ of primitive types to a minimum and reuses parquet's efficient encodings. For example, strings are stored as byte arrays (binary) with a UTF8 annotation. These annotations define how to further decode and interpret the data. Annotations are stored as `LogicalType` fields in the file metadata and are -documented in LogicalTypes.md. +documented in LogicalTypes.md. \ No newline at end of file diff --git a/content/en/docs/File Format/configurations.md b/content/en/docs/File Format/configurations.md index 9e21955..f12be5d 100644 --- a/content/en/docs/File Format/configurations.md +++ b/content/en/docs/File Format/configurations.md @@ -5,6 +5,7 @@ weight: 5 --- ### Row Group Size + Larger row groups allow for larger column chunks which makes it possible to do larger sequential IO. Larger groups also require more buffering in the write path (or a two pass write). We recommend large row groups (512MB - 1GB). @@ -18,4 +19,4 @@ Data pages should be considered indivisible so smaller data pages allow for more fine grained reading (e.g. single row lookup). Larger page sizes incur less space overhead (less page headers) and potentially less parsing overhead (processing headers). Note: for sequential scans, it is not expected to read a page -at a time; this is not the IO chunk. We recommend 8KB for page sizes. \ No newline at end of file +at a time; this is not the IO chunk. We recommend 8KB for page sizes. diff --git a/content/en/docs/File Format/metadata.md b/content/en/docs/File Format/metadata.md index 0e5e19b..a2eae25 100644 --- a/content/en/docs/File Format/metadata.md +++ b/content/en/docs/File Format/metadata.md @@ -6,4 +6,5 @@ weight: 5 There are three types of metadata: file metadata, column (chunk) metadata and page header metadata. All thrift structures are serialized using the TCompactProtocol. + ![File Layout](/images/FileFormat.gif) diff --git a/content/search.md b/content/search.md new file mode 100644 index 0000000..31b7cb3 --- /dev/null +++ b/content/search.md @@ -0,0 +1,5 @@ +--- +title: Search Results +layout: search + +--- \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..90cb541 --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module github.com/apache/parquet-site + +go 1.12 + +require github.com/google/docsy v0.9.1 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..645c0da --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +github.com/FortAwesome/Font-Awesome v0.0.0-20240108205627-a1232e345536/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo= +github.com/google/docsy v0.9.1 h1:+jqges1YCd+yHeuZ1BUvD8V8mEGVtPxULg5j/vaJ984= +github.com/google/docsy v0.9.1/go.mod h1:saOqKEUOn07Bc0orM/JdIF3VkOanHta9LU5Y53bwN2U= +github.com/twbs/bootstrap v5.2.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0= diff --git a/config.toml b/hugo.toml similarity index 93% rename from config.toml rename to hugo.toml index 43cc577..0e48bbd 100644 --- a/config.toml +++ b/hugo.toml @@ -10,9 +10,6 @@ enableMissingTranslationPlaceholders = true enableRobotsTXT = true -# Base theme for website -theme = ["docsy"] - # Will give values to .Lastmod etc. enableGitInfo = true @@ -44,16 +41,17 @@ quality = 75 anchor = "smart" # Language configuration - [languages] [languages.en] -title = "Apache Parquet" languageName ="English" -contentDir = "content/en" # Weight used for sorting. weight = 1 [languages.en.params] +title = "Apache Parquet" description = "The Apache Parquet Website" +contentDir = "content/en" +# Weight used for sorting. +weight = 1 [markup] [markup.goldmark] @@ -98,6 +96,7 @@ url_latest_version = "https://parquet.apache.org" # Repository configuration (URLs for in-page links to opening issues and suggesting changes) github_repo = "https://github.com/apache/parquet-site" + github_branch= "production" # Comment out to disable search. @@ -118,7 +117,7 @@ prism_syntax_highlighting = false # Set to true to disable breadcrumb navigation. breadcrumb_disable = false # Set to true to disable the About link in the site footer -footer_about_disable = false +footer_about_enable = true # Set to false if you don't want to display a logo (/assets/icons/logo.svg) in the top navbar navbar_logo = true # Set to true if you don't want the top navbar to be translucent when over a `block/cover`, like on the homepage. @@ -155,7 +154,7 @@ enable = false name ="Twitter" url = "https://twitter.com/ApacheParquet" icon = "fab fa-twitter" - desc = "Follow us on Twitter to get the latest news" + desc = "Follow us on Twitter to get the latest news!" [[params.links.user]] name = "Stack Overflow" url = "https://stackoverflow.com/questions/tagged/parquet" @@ -166,7 +165,7 @@ enable = false name = "GitHub" url = "https://github.com/apache/parquet-mr" icon = "fab fa-github" - desc = "Development takes place here" + desc = "Development takes place here!" [[params.links.developer]] name = "Slack" url = "https://the-asf.slack.com/" @@ -182,3 +181,13 @@ enable = false url = "https://issues.apache.org/jira/projects/PARQUET/issues" icon = "fas fa-bug" desc = "File/Track Open Bugs" + +[module] + # Uncomment the next line to build and serve using local docsy clone declared in the named Hugo workspace: + # workspace = "docsy.work" + [module.hugoVersion] + extended = true + min = "0.110.0" + [[module.imports]] + path = "github.com/google/docsy" + disable = false diff --git a/layouts/404.html b/layouts/404.html index 4087504..b962591 100644 --- a/layouts/404.html +++ b/layouts/404.html @@ -1,9 +1,6 @@ -{{ define "main"}} -
-
-

Not found

-

Oops! This page doesn't exist. Try going back to our home page.

- -
-
-{{ end }} +{{ define "main" -}} +
+

Not found

+

Oops! This page doesn't exist. Try going back to the home page.

+
+{{- end }} \ No newline at end of file diff --git a/layouts/partials/navbar.html b/layouts/partials/navbar.html index b20aec0..c0b643a 100644 --- a/layouts/partials/navbar.html +++ b/layouts/partials/navbar.html @@ -1,35 +1,66 @@ -{{ $cover := and (.HasShortcode "blocks/cover") (not .Site.Params.ui.navbar_translucent_over_cover_disable) }} - +{{ $cover := and + (.HasShortcode "blocks/cover") + (not .Site.Params.ui.navbar_translucent_over_cover_disable) +-}} +{{ $baseURL := urls.Parse $.Site.Params.Baseurl -}} + + \ No newline at end of file diff --git a/package.json b/package.json index 67e9bbd..a3e84f8 100644 --- a/package.json +++ b/package.json @@ -17,8 +17,8 @@ }, "homepage": "https://github.com/apache/parquet-site#readme", "devDependencies": { - "autoprefixer": "^10.4.4", - "postcss": "^8.4.12", + "autoprefixer": "^10.4.18", + "postcss": "^8.4.35", "postcss-cli": "^9.1.0" } } diff --git a/themes/docsy b/themes/docsy deleted file mode 160000 index 868b751..0000000 --- a/themes/docsy +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 868b75107c53196f25c6e57a3c704a556ef1f56e