From 3781e69b9dea3d0abcc5bbdeca6b6a7f37a39416 Mon Sep 17 00:00:00 2001
From: David Meza <david.meza-1@nasa.gov>
Date: Mon, 20 Aug 2018 12:24:52 -0400
Subject: [PATCH] initial upload

---
 dataNASAimport.cql | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 gephi.cql          | 34 ++++++++++++++++++++++++
 2 files changed, 100 insertions(+)
 create mode 100644 dataNASAimport.cql
 create mode 100644 gephi.cql

diff --git a/dataNASAimport.cql b/dataNASAimport.cql
new file mode 100644
index 0000000..ac288bb
--- /dev/null
+++ b/dataNASAimport.cql
@@ -0,0 +1,66 @@
+//////////////////////////////////////////////////////////////////////////////////
+// Create Nodes and edges using apoc stored procedure
+
+WITH "https://data.nasa.gov/data.json" AS url
+CALL apoc.load.json(url) YIELD value
+UNWIND value.dataset AS dbs
+//Split the spatial on space in order to create latitude and longitude properties
+// The field is split on spaces and commas, this works but does have some issues
+WITH dbs, apoc.text.split(dbs.spatial, "[\\s,+]",3) AS geo
+//Create dataset node and set properties
+MERGE (dataset:Dataset {name:dbs.identifier}) ON CREATE
+	SET dataset.oid = dbs.`_id.$oid`,
+	dataset.type = dbs.`@type`,
+	dataset.accessLevel = dbs.accessLevel,
+	dataset.accrualPeriodicity = dbs.accrualPeriodicity,
+	dataset.bureauCode = dbs.bureauCode,
+	dataset.description = dbs.description,
+	dataset.downloadURL = dbs.distribition.downloadURL,
+	dataset.mediaType = dbs.distribition.mediaType,
+	dataset.issued = dbs.issued,
+	dataset.landingPage = dbs.landingPage,
+	dataset.language = dbs.language,
+	dataset.modified = dbs.modified,
+	dataset.programCode = dbs.programCode,
+	dataset.latitude = TOFLOAT(geo[0]),
+	dataset.longitude = TOFLOAT(geo[1]),
+	dataset.temporal = dbs.temporal,
+	dataset.title = dbs.title,
+	dataset.license = dbs.license
+
+// My attempt to create a new relationship without rerunning the entire code
+// I noticed that there was a item calle "isPartOf" on some of the entries
+// This was to show the dataset was part of a larger dataset
+// I need to create a new realtionship in the DB
+FOREACH(ignoreMe IN CASE WHEN trim(dbs.isPartOf) <> " " THEN [1] ELSE [] END | MERGE (aggregated_dataset:Dataset {name:dbs.isPartOf}) MERGE (dataset)-[:isPartOf]->(aggregated_dataset))
+
+
+
+//Create contact point node and set property
+MERGE (contactPoint:ContactPoint {name:dbs.contactPoint.fn}) ON CREATE 
+	SET contactPoint.email = dbs.contactPoint.hasEmail
+// Create the relationship betweem the dataset and Point of Contact
+MERGE (dataset)-[:ForInfoContact]->(contactPoint)
+
+// ForEach creates nodes for each value in the item, then creates relationships
+FOREACH (keyWord in dbs.keyword | MERGE (keyword:Keyword {name:keyWord}) MERGE (keyword)-[:KEYWORD_IN]->(dataset))
+
+FOREACH(pub in dbs.publisher.name | MERGE (publisher:Publisher {name:pub}) MERGE (publisher)-[:PUBLISHED]->(dataset))
+
+FOREACH(pubParent in dbs.publisher.subOrganizationOf.name | MERGE (parent:PublisherParent {name:pubParent}) MERGE (publisher)-[:subOrganizationOf]->(parent))
+FOREACH(pubParent2 in dbs.publisher.subOrganizationOf.subOrganizationOf.name | MERGE (grandparent:PublisherGrandParent {name:pubParent2}) MERGE (parent)-[:subOrganizationOf]->(grandparent))
+
+FOREACH(t in dbs.theme | MERGE (theme:Theme {name:t}) MERGE (dataset)-[:IN_THEME]->(theme) MERGE (keyword)-[:IN_THEME]->(theme))
+
+
+
+
+/////////////////////////////////////////////
+// Do not run with import script ///////////
+// Closeness Centrality Procedure
+MATCH (keyword:Keyword)
+WHERE keyword.id %2 = 0
+WITH collect(keyword) AS nodes
+CALL apoc.algo.closeness(['TYPE'],nodes,'INCOMING') YIELD keyword, score
+RETURN keyword, score
+ORDER BY score DESC
\ No newline at end of file
diff --git a/gephi.cql b/gephi.cql
new file mode 100644
index 0000000..f16145d
--- /dev/null
+++ b/gephi.cql
@@ -0,0 +1,34 @@
+Example
+You can export your graph as an unweighted network.
+
+match path = (:Person)-[:ACTED_IN]->(:Movie)
+WITH path LIMIT 1000
+with collect(path) as paths
+call apoc.gephi.add(null,'workspace0', paths) yield nodes, relationships, time
+return nodes, relationships, time
+You can export your graph as a weighted network, by specifying the property of a relationship, that holds the weight value.
+
+match path = (:Person)-[r:ACTED_IN]->(:Movie) where exists r.weightproperty
+WITH path LIMIT 1000
+with collect(path) as paths
+call apoc.gephi.add(null,'workspace0', paths, 'weightproperty') yield nodes, relationships, time
+return nodes, relationships, time
+You can also export with your graph other properties of your nodes and/or relationship by adding an optional array with the property names you want to export. Example for exporting birthYear and role property.
+
+match path = (:Person)-[r:ACTED_IN]->(:Movie) where exists r.weightproperty
+WITH path LIMIT 1000
+with collect(path) as paths
+call apoc.gephi.add(null,'workspace0', paths, 'weightproperty',['birthYear', 'role']) yield nodes, relationships, time
+return nodes, relationships, time
+
+
+
+Used this script
+
+MATCH path=(:Keyword)-->(:Dataset)-->(:Theme)
+WITH path LIMIT 500000
+with collect(path) as paths
+call apoc.gephi.add(null,'workspace1', paths) yield nodes, relationships, time
+return nodes, relationships, time
+
+