Skip to content

Commit

Permalink
Extraction graph from yaml (#34)
Browse files Browse the repository at this point in the history
* extraction graph from yaml

* version bump
  • Loading branch information
lucasjacks0n authored May 15, 2024
1 parent 0b9f008 commit fedd5ca
Show file tree
Hide file tree
Showing 6 changed files with 129 additions and 56 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "getindexify",
"version": "0.0.42",
"version": "0.0.43",
"description": "This is the TypeScript client for interacting with the Indexify service.",
"main": "./dist/index.js",
"module": "./dist/index.mjs",
Expand Down
54 changes: 54 additions & 0 deletions src/ExtractionGraph.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { IExtractionPolicy } from "./types";
import yaml from "yaml";

class ExtractionGraph {
id?: string;
name: string;
namespace?: string;
extraction_policies: IExtractionPolicy[];

constructor({
id,
name,
namespace,
extraction_policies,
}: {
id?: string;
name: string;
namespace?: string;
extraction_policies: IExtractionPolicy[];
}) {
this.id = id;
this.name = name;
this.namespace = namespace;
this.extraction_policies = extraction_policies;
}

static fromDict(json: Record<string, any>): ExtractionGraph {
if ("namespace" in json) {
delete json["namespace"];
}
return new ExtractionGraph({
id: json.id,
name: json.name,
extraction_policies: json.extraction_policies,
});
}

static fromYaml(spec: string): ExtractionGraph {
const json = yaml.parse(spec);
return ExtractionGraph.fromDict(json);
}

toDict(): Record<string, any> {
const filteredDict: Record<string, any> = {};
for (const key in this) {
if (this[key] !== null && this[key] !== undefined) {
filteredDict[key] = this[key];
}
}
return filteredDict;
}
}

export default ExtractionGraph;
53 changes: 27 additions & 26 deletions src/client.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import Extractor from "./extractor";
import {
IContentMetadata,
IExtractor,
IExtractionGraph,
IIndex,
INamespace,
ITask,
Expand All @@ -20,20 +19,21 @@ import {
} from "./types";
import { v4 as uuidv4 } from "uuid";
import CryptoJS from "crypto-js";
import ExtractionGraph from "./ExtractionGraph";

const DEFAULT_SERVICE_URL = "http://localhost:8900"; // Set your default service URL

class IndexifyClient {
public serviceUrl: string;
private client: AxiosInstance;
public namespace: string;
public extractionGraphs: IExtractionGraph[];
public extractionGraphs: ExtractionGraph[];

constructor(
serviceUrl: string = DEFAULT_SERVICE_URL,
namespace: string = "default",
// optional mtls config
extractionGraphs: IExtractionGraph[],
extractionGraphs: ExtractionGraph[],
httpsAgent?: any
) {
this.serviceUrl = serviceUrl;
Expand Down Expand Up @@ -62,13 +62,17 @@ class IndexifyClient {
return new IndexifyClient(
serviceUrl,
namespace,
response.data.namespace.extraction_graphs.map((graph: { extraction_policies: any[]; }) => ({
...graph,
extraction_policies: graph.extraction_policies.map((policy: { filters_eq: any; }) => ({
...policy,
labels_eq: policy.filters_eq, // Transform filters_eq to labels_eq
}))
})),
response.data.namespace.extraction_graphs.map(
(graph: { extraction_policies: any[] }) => ({
...graph,
extraction_policies: graph.extraction_policies.map(
(policy: { filters_eq: any }) => ({
...policy,
labels_eq: policy.filters_eq, // Transform filters_eq to labels_eq
})
),
})
),
IndexifyClient.getHttpsAgent({ mtlsConfig })
);
}
Expand Down Expand Up @@ -161,7 +165,7 @@ class IndexifyClient {
mtlsConfig,
}: {
name: string;
extractionGraphs?: IExtractionGraph[];
extractionGraphs?: ExtractionGraph[];
labels?: Record<string, string>;
mtlsConfig?: IMtlsConfig;
}) {
Expand Down Expand Up @@ -205,17 +209,14 @@ class IndexifyClient {
}

async createExtractionGraph(
name: string,
extractionPolicies: IExtractionPolicy | IExtractionPolicy[]
extractionGraph: ExtractionGraph
): Promise<IAddExtractorGraphResponse> {
const policiesArray = Array.isArray(extractionPolicies)
? extractionPolicies
: [extractionPolicies];

const resp = await this.client.post("extraction_graphs", {
name,
extraction_policies: policiesArray,
});
const data = {
name: extractionGraph.name,
extraction_policies: extractionGraph.extraction_policies,
}
console.log("create extraction graph", JSON.stringify(data.extraction_policies));
const resp = await this.client.post("extraction_graphs", data);

// update this.extractor_bindings
await this.getExtractionGraphs();
Expand Down Expand Up @@ -364,15 +365,15 @@ class IndexifyClient {
Object.keys(labels).forEach((key) => {
formData.append(key, labels[key]);
});

// Upload File
const res = await this.client.post("upload_file", formData, {
headers: {
...formData.getHeaders(),
},
params,
});
return res.data.content_id
return res.data.content_id;
} else {
// browser
if (!isBlob(fileInput)) {
Expand All @@ -390,13 +391,13 @@ class IndexifyClient {

// Upload File
const res = await this.client.post("/upload_file", formData, {
params
params,
});
return res.data.content_id
return res.data.content_id;
}
}

async getExtractionGraphs(): Promise<IExtractionGraph[]> {
async getExtractionGraphs(): Promise<ExtractionGraph[]> {
const resp = await this.client.get("");
const extractionGraphs = resp.data.namespace?.extraction_graphs ?? [];
this.extractionGraphs = extractionGraphs;
Expand Down
4 changes: 2 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import ExtractionGraph from "./ExtractionGraph";
import IndexifyClient from "./client";
import Extractor from "./extractor";
import {
Expand All @@ -8,7 +9,6 @@ import {
IIndex,
IContentMetadata,
IExtractedMetadata,
IExtractionGraph,
IExtractionPolicy,
ISearchIndexResponse,
ITask,
Expand All @@ -29,7 +29,7 @@ export {
IIndex,
IContentMetadata,
IExtractedMetadata,
IExtractionGraph,
ExtractionGraph,
IExtractionPolicy,
ISearchIndexResponse,
ITask,
Expand Down
13 changes: 4 additions & 9 deletions src/types.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import ExtractionGraph from "./ExtractionGraph";

export interface INamespace {
name: string;
extraction_graphs: IExtractionGraph[];
extraction_graphs: ExtractionGraph[];
}

export interface IEmbeddingSchema {
Expand Down Expand Up @@ -64,21 +66,14 @@ export interface IExtractedMetadata {
extractor_name: string;
}

export interface IExtractionGraph {
id: string;
name: string;
namespace: string;
extraction_policies: IExtractionPolicy[];
}

export interface IExtractionPolicy {
id?: string;
extractor: string;
name: string;
labels_eq?: string;
input_params?: Record<string, string | number>;
content_source?: string;
graph_name: string;
graph_name?: string;
}

export interface ITaskContentMetadata {
Expand Down
59 changes: 41 additions & 18 deletions tests/client.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { IndexifyClient } from "../src";
import ExtractionGraph from "../src/ExtractionGraph";
import { IExtractionPolicy } from "../src/types";
import { isAxiosError } from "axios";

Expand All @@ -23,16 +24,15 @@ async function setupExtractionGraph(
extractor: string
): Promise<string[]> {
const nanoid = generateNanoId(8);
const extractionPolicy: IExtractionPolicy = {
extractor,
name: `extractor.${nanoid}`,
graph_name: extractionGraphName
};
const resp = await client.createExtractionGraph(
extractionGraphName,
extractionPolicy
);
return resp.indexes

const graph = ExtractionGraph.fromYaml(`
name: '${extractionGraphName}'
extraction_policies:
- extractor: '${extractor}'
name: 'extractor.${nanoid}'
`);
const resp = await client.createExtractionGraph(graph);
return resp.indexes;
}

test("createClient", async () => {
Expand Down Expand Up @@ -119,8 +119,8 @@ test("searchIndex", async () => {
extractionGraphName,
"tensorlake/minilm-l6"
);
expect(indexes.length).toBe(1)

expect(indexes.length).toBe(1);
await client.addDocuments(extractionGraphName, [
{ text: "This is a test1", labels: { source: "test" } },
{ text: "This is a test2", labels: { source: "test" } },
Expand Down Expand Up @@ -198,7 +198,10 @@ test("getStructuredMetadata", async () => {
"tensorlake/wikipedia"
);

const contentId = await client.uploadFile(extractionGraphName, `${__dirname}/files/steph_curry.html`);
const contentId = await client.uploadFile(
extractionGraphName,
`${__dirname}/files/steph_curry.html`
);
await new Promise((r) => setTimeout(r, 10000));
const extractedMetadata = await client.getStructuredMetadata(contentId);
expect(extractedMetadata.length).toBeGreaterThanOrEqual(1);
Expand All @@ -215,16 +218,18 @@ test("getSchemas", async () => {
extractionGraphName,
"tensorlake/wikipedia"
);

// upload html
await client.uploadFile(extractionGraphName, `${__dirname}/files/steph_curry.html`);
await client.uploadFile(
extractionGraphName,
`${__dirname}/files/steph_curry.html`
);
await new Promise((r) => setTimeout(r, 10000));


const schemas = await client.getSchemas();
expect(schemas.length).toBe(1);
expect(schemas[0].extraction_graph_name).toBe(extractionGraphName)
expect(Object.keys(schemas[0].columns).length).toBe(13)
expect(schemas[0].extraction_graph_name).toBe(extractionGraphName);
expect(Object.keys(schemas[0].columns).length).toBe(13);
});

test("downloadContent", async () => {
Expand Down Expand Up @@ -321,6 +326,24 @@ test("extract", async () => {
expect(content.features?.[0].data.title).toBe("Stephen Curry");
});

test("extractionGraph from yaml", async () => {
const graph = ExtractionGraph.fromYaml(`
name: 'nbakb'
extraction_policies:
- extractor: 'tensorlake/chunk-extractor'
name: 'chunker'
input_params:
chunk_size: 1000
overlap: 100
- extractor: 'tensorlake/minilm-l6'
name: 'wikiembedding'
content_source: 'chunker'
`);
expect(graph.extraction_policies.length).toBe(2);
expect(graph.id).toBe(undefined);
expect(graph.name).toBe("nbakb");
});

test("generateHashFromString", async () => {
const client = await IndexifyClient.createClient();
expect(client.generateHashFromString("test")).toBe("9f86d081884c7d65");
Expand Down

0 comments on commit fedd5ca

Please sign in to comment.