chore(spark): Add reference document about how to integrate cloud sto…

…rage and remove related dependencies (#572) Signed-off-by: acezen <[email protected]>
apache · Aug 1, 2024 · c0f0665 · c0f0665
1 parent 4a1ad5b
commit c0f0665
Show file tree

Hide file tree

Showing 3 changed files with 7 additions and 30 deletions.
diff --git a/docs/libraries/spark/spark.md b/docs/libraries/spark/spark.md
@@ -225,6 +225,13 @@ For more information on usage, please refer to the examples:
 - [TransformExample.scala][transform-example] shows an example for graph data conversion between different file types or different adjList types.
 - [Neo4j2GraphAr.scala][neo4j2graphar] and [GraphAr2Neo4j.scala][graphar2neo4j] are examples to conduct data importing/exporting for Neo4j.
 
+### Working with Cloud Storage (AWS S3, aliyun OSS)
+
+The Spark library for GraphAr supports reading and writing data from/to cloud storage services such as AWS S3, to do so, you need to include the Hadoop AWS dependency in your project. See the reference documentation for more details.
+
+- [AWS S3](https://spark.apache.org/docs/latest/cloud-integration.html)
+- [Aliyun OSS](https://hadoop.apache.org/docs/stable/hadoop-aliyun/tools/hadoop-aliyun/index.html)
+
 
 [test-graph-info]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestGraphInfo.scala
 [test-index-generator]: https://github.com/apache/incubator-graphar/blob/main/maven-projects/spark/graphar/src/test/scala/org/apache/graphar/TestIndexGenerator.scala

diff --git a/maven-projects/spark/graphar/pom.xml b/maven-projects/spark/graphar/pom.xml
@@ -88,34 +88,6 @@
             <artifactId>snakeyaml</artifactId>
             <version>2.0</version>
         </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>hadoop-fs-oss</artifactId>
-            <version>${cupid.sdk.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>org.apache.hadoop</groupId>
-                    <artifactId>hadoop-common</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>odps-spark-datasource_2.11</artifactId>
-            <version>${cupid.sdk.version}</version>
-            <exclusions>
-                <exclusion>
-                    <groupId>net.jpountz.lz4</groupId>
-                    <artifactId>lz4</artifactId>
-                </exclusion>
-            </exclusions>
-        </dependency>
-        <dependency>
-            <groupId>com.aliyun.odps</groupId>
-            <artifactId>cupid-sdk</artifactId>
-            <version>${cupid.sdk.version}</version>
-            <scope>provided</scope>
-        </dependency>
         <dependency>
             <groupId>org.neo4j</groupId>
             <artifactId>neo4j-connector-apache-spark_2.12</artifactId>

diff --git a/maven-projects/spark/pom.xml b/maven-projects/spark/pom.xml
@@ -49,7 +49,6 @@
                 <spark.version>3.2.2</spark.version>
                 <maven.compiler.source>1.8</maven.compiler.source>
                 <maven.compiler.target>1.8</maven.compiler.target>
-                <cupid.sdk.version>3.3.8-public</cupid.sdk.version>
             </properties>
             <modules>
                 <module>graphar</module>
@@ -72,7 +71,6 @@
                 <spark.version>3.3.4</spark.version>
                 <maven.compiler.source>1.8</maven.compiler.source>
                 <maven.compiler.target>1.8</maven.compiler.target>
-                <cupid.sdk.version>3.3.8-public</cupid.sdk.version>
             </properties>
             <modules>
                 <module>graphar</module>