feat: Automatically load default Parquet files from public/artifacts …

…on app startup (#17)
noworneverev · Oct 22, 2024 · 382c8be · 382c8be
1 parent b35647a
commit 382c8be
Show file tree

Hide file tree

Showing 5 changed files with 276 additions and 134 deletions.
diff --git a/README.md b/README.md
@@ -6,14 +6,15 @@
 ![demo](public/demo.png)
 
 ## Overview
+
 GraphRAG Visualizer is an application designed to visualize Microsoft [GraphRAG](https://github.com/microsoft/graphrag) artifacts. By uploading parquet files generated from the GraphRAG indexing pipeline, users can easily view and analyze data without needing additional software or scripts.
 
 ## Features
+
 - **Graph Visualization**: View the graph in 2D or 3D in the "Graph Visualization" tab.
 - **Data Tables**: Display data from the parquet files in the "Data Tables" tab.
 - **Search Functionality**: Fully supports search, allowing users to focus on specific nodes or relationships.
 - **Local Processing**: All artifacts are processed locally on your machine, ensuring data security and privacy.
-
 
 ## Using the Search Functionality
 
@@ -22,27 +23,69 @@ Once the [graphrag-api](https://github.com/noworneverev/graphrag-api) server is
 ![search](public/search.png)
 
 ## Graph Data Model
-The logic for creating relationships for text units, documents, communities, and covariates is derived from the [GraphRAG import Neo4j Cypher notebook](https://github.com/microsoft/graphrag/blob/main/examples_notebooks/community_contrib/neo4j/graphrag_import_neo4j_cypher.ipynb).
 
+The logic for creating relationships for text units, documents, communities, and covariates is derived from the [GraphRAG import Neo4j Cypher notebook](https://github.com/microsoft/graphrag/blob/main/examples_notebooks/community_contrib/neo4j/graphrag_import_neo4j_cypher.ipynb).
 
 ### Nodes
 
-| Node | Type        |
-|-----------|--------------|
+| Node      | Type           |
+| --------- | -------------- |
 | Document  | `RAW_DOCUMENT` |
 | Text Unit | `CHUNK`        |
 | Community | `COMMUNITY`    |
 | Finding   | `FINDING`      |
 | Covariate | `COVARIATE`    |
-| Entity    | *Varies*       |
+| Entity    | _Varies_       |
 
 ### Relationships
 
-| Source Node | Relationship  | Target Node |
-|-------------|---------------|-------------|
-| Entity      | `RELATED`     | Entity      |
-| Text Unit   | `PART_OF`     | Document    |
-| Text Unit   | `HAS_ENTITY`  | Entity      |
-| Text Unit   | `HAS_COVARIATE` | Covariate |
-| Community   | `HAS_FINDING` | Finding     |
-| Entity      | `IN_COMMUNITY` | Community  |
+| Source Node | Relationship    | Target Node |
+| ----------- | --------------- | ----------- |
+| Entity      | `RELATED`       | Entity      |
+| Text Unit   | `PART_OF`       | Document    |
+| Text Unit   | `HAS_ENTITY`    | Entity      |
+| Text Unit   | `HAS_COVARIATE` | Covariate   |
+| Community   | `HAS_FINDING`   | Finding     |
+| Entity      | `IN_COMMUNITY`  | Community   |
+
+## Developer Instructions
+
+### Setting Up the Project
+
+1. Clone the repository to your local machine:
+
+   ```bash
+   git clone https://github.com/noworneverev/graphrag-visualizer.git
+   cd graphrag-visualizer
+   ```
+
+2. Install the necessary dependencies:
+
+   ```bash
+   npm install
+   ```
+
+3. Run the development server:
+
+   ```bash
+   npm start
+   ```
+
+4. Open the app in your browser:
+   ```
+   http://localhost:3000
+   ```
+
+### Loading Parquet Files
+
+To load `.parquet` files automatically when the application starts, place your Parquet files in the `public/artifacts` directory. These files will be loaded into the application for visualization and data table display. The files can be organized as follows:
+
+- `public/artifacts/create_final_entities.parquet`
+- `public/artifacts/create_final_relationships.parquet`
+- `public/artifacts/create_final_documents.parquet`
+- `public/artifacts/create_final_text_units.parquet`
+- `public/artifacts/create_final_communities.parquet`
+- `public/artifacts/create_final_community_reports.parquet`
+- `public/artifacts/create_final_covariates.parquet`
+
+If the files are placed in the `public/artifacts` folder, the app will automatically load and display them on startup.
diff --git a/public/artifacts/put_parquet_files_here b/public/artifacts/put_parquet_files_here
diff --git a/src/app/components/GraphDataHandler.tsx b/src/app/components/GraphDataHandler.tsx
@@ -36,6 +36,7 @@ const GraphDataHandler: React.FC = () => {
     covariates,
     communityReports,
     handleFilesRead,
+    loadDefaultFiles,
   } = useFileHandler();
 
   const graphData = useGraphData(
@@ -57,6 +58,12 @@ const GraphDataHandler: React.FC = () => {
   const hasCommunities = communities.length > 0;
   const hasCovariates = covariates.length > 0;
 
+  useEffect(() => {
+    if (process.env.NODE_ENV === "development") {
+      loadDefaultFiles();
+    }
+  }, []);
+
   useEffect(() => {
     if (entities.length > 0) {
       setTabIndex(1);

diff --git a/src/app/hooks/useFileHandler.ts b/src/app/hooks/useFileHandler.ts
@@ -8,6 +8,17 @@ import { CommunityReport } from "../models/community-report";
 import { Covariate } from "../models/covariate";
 import { readParquetFile } from "../utils/parquet-utils";
 
+// Paths to default files in the public folder
+const defaultFiles = [
+  process.env.PUBLIC_URL + "/artifacts/create_final_entities.parquet",
+  process.env.PUBLIC_URL + "/artifacts/create_final_relationships.parquet",
+  process.env.PUBLIC_URL + "/artifacts/create_final_documents.parquet",
+  process.env.PUBLIC_URL + "/artifacts/create_final_text_units.parquet",
+  process.env.PUBLIC_URL + "/artifacts/create_final_communities.parquet",
+  process.env.PUBLIC_URL + "/artifacts/create_final_community_reports.parquet",
+  process.env.PUBLIC_URL + "/artifacts/create_final_covariates.parquet",
+];
+
 const fileSchemas: { [key: string]: string } = {
   "create_final_entities.parquet": "entity",
   "create_final_relationships.parquet": "relationship",
@@ -25,9 +36,15 @@ const useFileHandler = () => {
   const [textunits, setTextUnits] = useState<TextUnit[]>([]);
   const [communities, setCommunities] = useState<Community[]>([]);
   const [covariates, setCovariates] = useState<Covariate[]>([]);
-  const [communityReports, setCommunityReports] = useState<CommunityReport[]>([]);
+  const [communityReports, setCommunityReports] = useState<CommunityReport[]>(
+    []
+  );
 
   const handleFilesRead = async (files: File[]) => {
+    await loadFiles(files);
+  };
+
+  const loadFiles = async (files: File[] | string[]) => {
     const entitiesArray: Entity[][] = [];
     const relationshipsArray: Relationship[][] = [];
     const documentsArray: Document[][] = [];
@@ -37,8 +54,32 @@ const useFileHandler = () => {
     const covariatesArray: Covariate[][] = [];
 
     for (const file of files) {
-      const schema = fileSchemas[file.name];
-      const data = await readParquetFile(file, schema);
+      const fileName =
+        typeof file === "string" ? file.split("/").pop()! : file.name;
+      const schema = fileSchemas[fileName];
+
+      let data;
+      if (typeof file === "string") {
+        // Fetch default file from public folder as binary data
+        const response = await fetch(file);
+        if (!response.ok) {
+          console.error(`Failed to fetch file ${file}: ${response.statusText}`);
+          continue;
+        }
+
+        // Convert ArrayBuffer to File object
+        const buffer = await response.arrayBuffer();
+        const blob = new Blob([buffer], { type: "application/x-parquet" });
+        const fileBlob = new File([blob], fileName);
+
+        // Use the File object in readParquetFile
+        data = await readParquetFile(fileBlob, schema);
+        // console.log(`Successfully loaded ${fileName} from public folder`);
+      } else {
+        // Handle drag-and-drop files directly
+        data = await readParquetFile(file, schema);
+        // console.log(`Successfully loaded ${file.name} from drag-and-drop`);
+      }
 
       if (schema === "entity") {
         entitiesArray.push(data);
@@ -57,21 +98,61 @@ const useFileHandler = () => {
       }
     }
 
-    const allEntities = entitiesArray.flat();
-    const allRelationships = relationshipsArray.flat();
-    const allDocuments = documentsArray.flat();
-    const allTextUnits = textUnitsArray.flat();
-    const allCommunities = communitiesArray.flat();
-    const allCommunityReports = communityReportsArray.flat();
-    const allCovariates = covariatesArray.flat();
-
-    setEntities(allEntities);
-    setRelationships(allRelationships);
-    setDocuments(allDocuments);
-    setTextUnits(allTextUnits);
-    setCommunities(allCommunities);
-    setCommunityReports(allCommunityReports);
-    setCovariates(allCovariates);
+    setEntities(entitiesArray.flat());
+    setRelationships(relationshipsArray.flat());
+    setDocuments(documentsArray.flat());
+    setTextUnits(textUnitsArray.flat());
+    setCommunities(communitiesArray.flat());
+    setCommunityReports(communityReportsArray.flat());
+    setCovariates(covariatesArray.flat());
+  };
+
+  const checkFileExists = async (filePath: string) => {
+    try {
+      const response = await fetch(filePath, {
+        method: "HEAD",
+        cache: "no-store",
+      });
+
+      if (response.ok) {
+        const contentType = response.headers.get("Content-Type");
+
+        if (contentType === "application/octet-stream") {
+          // Updated Content-Type check
+          console.log(`File exists: ${filePath}`);
+          return true;
+        } else {
+          console.warn(
+            `File does not exist or incorrect type: ${filePath} (Content-Type: ${contentType})`
+          );
+          return false;
+        }
+      } else {
+        console.warn(
+          `File does not exist: ${filePath} (status: ${response.status})`
+        );
+        return false;
+      }
+    } catch (error) {
+      console.error(`Error checking file existence for ${filePath}`, error);
+      return false;
+    }
+  };
+
+  const loadDefaultFiles = async () => {
+    const filesToLoad = [];
+
+    for (const file of defaultFiles) {
+      const fileExists = await checkFileExists(file);
+      if (fileExists) {
+        filesToLoad.push(file); // Add to load queue if the file exists
+      }
+    }
+    if (filesToLoad.length > 0) {
+      await loadFiles(filesToLoad);
+    } else {
+      console.log("No default files found in the public folder.");
+    }
   };
 
   return {
@@ -83,6 +164,7 @@ const useFileHandler = () => {
     covariates,
     communityReports,
     handleFilesRead,
+    loadDefaultFiles,
   };
 };