From c0f0665ee1067debc3cf723ed4e069385c7d0ad3 Mon Sep 17 00:00:00 2001 From: Weibin Zeng Date: Thu, 1 Aug 2024 16:50:57 +0800 Subject: [PATCH] chore(spark): Add reference document about how to integrate cloud storage and remove related dependencies (#572) Signed-off-by: acezen --- docs/libraries/spark/spark.md | 7 +++++++ maven-projects/spark/graphar/pom.xml | 28 ---------------------------- maven-projects/spark/pom.xml | 2 -- 3 files changed, 7 insertions(+), 30 deletions(-) diff --git a/docs/libraries/spark/spark.md b/docs/libraries/spark/spark.md index 7043005df..eaf79b8b9 100644 --- a/docs/libraries/spark/spark.md +++ b/docs/libraries/spark/spark.md @@ -225,6 +225,13 @@ For more information on usage, please refer to the examples: - [TransformExample.scala][transform-example] shows an example for graph data conversion between different file types or different adjList types. - [Neo4j2GraphAr.scala][neo4j2graphar] and [GraphAr2Neo4j.scala][graphar2neo4j] are examples to conduct data importing/exporting for Neo4j. +### Working with Cloud Storage (AWS S3, aliyun OSS) + +The Spark library for GraphAr supports reading and writing data from/to cloud storage services such as AWS S3, to do so, you need to include the Hadoop AWS dependency in your project. See the reference documentation for more details. + +- [AWS S3](https://spark.apache.org/docs/latest/cloud-integration.html) +- [Aliyun OSS](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html) + [test-graph-info]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestGraphInfo.scala [test-index-generator]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestIndexGenerator.scala diff --git a/maven-projects/spark/graphar/pom.xml b/maven-projects/spark/graphar/pom.xml index 74626a620..9dc46eb49 100644 --- a/maven-projects/spark/graphar/pom.xml +++ b/maven-projects/spark/graphar/pom.xml @@ -88,34 +88,6 @@ snakeyaml 2.0 - - com.aliyun.odps - hadoop-fs-oss - ${cupid.sdk.version} - - - org.apache.hadoop - hadoop-common - - - - - com.aliyun.odps - odps-spark-datasource_2.11 - ${cupid.sdk.version} - - - net.jpountz.lz4 - lz4 - - - - - com.aliyun.odps - cupid-sdk - ${cupid.sdk.version} - provided - org.neo4j neo4j-connector-apache-spark_2.12 diff --git a/maven-projects/spark/pom.xml b/maven-projects/spark/pom.xml index e04ed4ae4..455fb1754 100644 --- a/maven-projects/spark/pom.xml +++ b/maven-projects/spark/pom.xml @@ -49,7 +49,6 @@ 3.2.2 1.8 1.8 - 3.3.8-public graphar @@ -72,7 +71,6 @@ 3.3.4 1.8 1.8 - 3.3.8-public graphar