Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/cleanup #913

Draft
wants to merge 26 commits into
base: feature/federated_search
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion modules/server/.env.schema
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS=false
CONFIG_PATH=./configs
DATA_MASK_THRESHOLD=
DATA_MASK_MIN_THRESHOLD=
DEBUG=false
DOCUMENT_TYPE=''
DOWNLOAD_STREAM_BUFFER_SIZE=2000
Expand Down
2 changes: 1 addition & 1 deletion modules/server/src/app.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ export default async function (rootPath = '') {
* @param {boolean} enableAdmin
* @param {boolean} enabledDocumentHits - enables including "hits" property in the GQL response
*/
return Arranger({
return arranger({
enableAdmin: ENV_CONFIG.ENABLE_ADMIN,
enableDocumentHits: ENV_CONFIG.ENABLE_DOCUMENT_HITS,
}).then((router) => {
Expand Down
3 changes: 2 additions & 1 deletion modules/server/src/config/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ export const ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS = stringToBool(
process.env.ALLOW_CUSTOM_MAX_DOWNLOAD_ROWS,
);
export const CONFIG_FILES_PATH = process.env.CONFIG_PATH || './configs';
export const DATA_MASK_THRESHOLD = process.env.DATA_MASK_THRESHOLD || Number.MAX_SAFE_INTEGER;
export const DATA_MASK_MIN_THRESHOLD =
process.env.DATA_MASK_MIN_THRESHOLD || Number.MAX_SAFE_INTEGER;
export const DEBUG_MODE = stringToBool(process.env.DEBUG);
export const DOCUMENT_TYPE = process.env.DOCUMENT_TYPE || '';
export const DOWNLOAD_STREAM_BUFFER_SIZE =
Expand Down
13 changes: 13 additions & 0 deletions modules/server/src/gqlServer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import { Client } from '@elastic/elasticsearch';
import { GraphQLResolveInfo } from 'graphql';

export type Context = {
esClient: Client;
};

export type Resolver<Root, QueryArgs, ReturnValue> = (
root: Root,
args: QueryArgs,
context: Context,
info: GraphQLResolveInfo,
) => ReturnValue;
53 changes: 20 additions & 33 deletions modules/server/src/graphqlRoutes.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,13 @@ import { ApolloServer } from 'apollo-server-express';
import { Router } from 'express';
import expressPlayground from 'graphql-playground-middleware-express';

import getConfigObject, { initializeSets } from './config';
import { mergeSchemas } from '@graphql-tools/schema';
import getConfigObject, { ENV_CONFIG, initializeSets } from './config';
import { DEBUG_MODE, ES_PASS, ES_USER } from './config/constants';
import { ConfigProperties } from './config/types';
import { addMappingsToTypes, extendFields, fetchMapping } from './mapping';
import { extendColumns, extendFacets, flattenMappingToFields } from './mapping/extendMapping';
import { createSchemaFromNetworkConfig, mergeSchemas } from './network';
import { createSchemaFromNetworkConfig } from './network';
import makeSchema from './schema';

const getESMapping = async (esClient, index) => {
Expand Down Expand Up @@ -156,32 +157,13 @@ const noSchemaHandler =
});
};

const createEndpoint = async ({
esClient,
graphqlOptions = {},
mockSchema,
schema,
networkSchema,
}) => {
const createEndpoint = async ({ esClient, graphqlOptions = {}, mockSchema, schema }) => {
const mainPath = '/graphql';
const mockPath = '/mock/graphql';
const router = Router();

console.log('Starting GraphQL server:');

if (ENABLE_NETWORK_AGGREGATION) {
/**
* TODO: make available on one route
*/
const networkPath = '/network';
const apolloNetworkServer = new ApolloServer({
cache: 'bounded',
schema: networkSchema,
});
await apolloNetworkServer.start();
apolloNetworkServer.applyMiddleware({ app: router, path: networkPath });
}

try {
await router.get(
mainPath,
Expand Down Expand Up @@ -261,10 +243,11 @@ const createEndpoint = async ({
return router;
};

export const createSchemasFromConfigs = async ({
const createSchemasFromConfigs = async ({
configsSource = '',
enableAdmin,
enableDocumentHits,
enableNetworkAggregation,
esClient,
getServerSideFilter,
graphqlOptions = {},
Expand All @@ -287,28 +270,31 @@ export const createSchemasFromConfigs = async ({
types: typesWithMappings,
});

/**
const schemasToMerge = [schema];

/*
* Federated Network Search
*/
if (ENABLE_NETWORK_AGGREGATION) {
const { networkSchema } = await createSchemaFromNetworkConfig({
if (enableNetworkAggregation) {
const networkSchema = await createSchemaFromNetworkConfig({
networkConfigs: configsFromFiles[ConfigProperties.NETWORK_AGGREGATION].map((config) => ({
...config,
/**
/*
* part of the gql schema is generated dynamically
* in the case of the "file" field, the field name and type name are the same
* it's more flexible to define it here as an additional property than to confuse functions further down the pipeline
* in the case of the "file" field, the field name and gql type name are the same
*/
documentName: config.documentType,
})),
});
schemasToMerge.push(networkSchema);
}

const fullSchema = mergeSchemas({ schemas: schemasToMerge });

return {
...commonFields,
mockSchema,
schema,
networkSchema,
schema: fullSchema,
};
} catch (error) {
const message = error?.message || error;
Expand All @@ -323,16 +309,18 @@ export default async ({
configsSource = '',
enableAdmin,
enableDocumentHits,
enableNetworkAggregation,
esClient,
getServerSideFilter,
graphqlOptions = {},
}) => {
try {
const { fieldsFromMapping, mockSchema, schema, typesWithMappings, networkSchema } =
const { fieldsFromMapping, mockSchema, schema, typesWithMappings } =
await createSchemasFromConfigs({
configsSource,
enableAdmin,
enableDocumentHits,
enableNetworkAggregation,
esClient,
getServerSideFilter,
graphqlOptions,
Expand All @@ -343,7 +331,6 @@ export default async ({
graphqlOptions,
mockSchema,
schema,
networkSchema,
});

await initializeSets({ esClient });
Expand Down
4 changes: 2 additions & 2 deletions modules/server/src/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
export { default as getGraphQLRoutes, createSchemasFromConfigs } from './graphqlRoutes';
export { default } from './server';
export { default as App } from './app';
export { default as getGraphQLRoutes } from './graphqlRoutes';
export { default } from './server';
5 changes: 1 addition & 4 deletions modules/server/src/mapping/createConnectionResolvers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ export type CreateConnectionResolversArgs = {
createStateResolvers?: boolean;
enableAdmin: boolean;
enableDocumentHits: boolean;
dataMaskThreshold: number;
getServerSideFilter?: GetServerSideFilterFn;
getServerSideFilter: GetServerSideFilterFn;
Parallel: any;
type: Record<string, any>;
};
Expand All @@ -20,7 +19,6 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({
createStateResolvers = true,
enableAdmin,
enableDocumentHits,
dataMaskThreshold,
getServerSideFilter,
Parallel,
type,
Expand All @@ -30,7 +28,6 @@ const createConnectionResolvers: CreateConnectionResolversFn = ({
type,
Parallel,
getServerSideFilter,
dataMaskThreshold,
enableDocumentHits,
});

Expand Down
2 changes: 0 additions & 2 deletions modules/server/src/mapping/createConnectionTypeDefs.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ export default ({ type, fields = '', createStateTypeDefs = true, showRecords })
aggregations_filter_themselves: Boolean
): ${type.name}Aggregations

${!showRecords ? 'dataMasking: DataMasking' : ''}

configs: ${createStateTypeDefs ? 'ConfigsWithState' : 'ConfigsWithoutState'}

hits(
Expand Down
32 changes: 21 additions & 11 deletions modules/server/src/mapping/masking.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
import { Aggregation } from './types';
import { ENV_CONFIG } from '@/config';
import { Aggregation } from './resolveAggregations';

export const Relation = {
eq: 'eq',
gte: 'gte',
} as const;
export type Relation = keyof typeof Relation;

/**
* This returns a total count that is less than or equal to the actual total hits in the query.
* It is calculated by adding +1 for values under threshold and bucket.doc_count
* It is calculated by adding +1 for values under threshold or adding bucket.doc_count amount
* for values greater than or equal to
*
* @param aggregation an aggregation with the most buckets which has data masking applied
Expand All @@ -18,7 +25,8 @@ const calculateHitsFromAggregation = ({
return 0;
}
return aggregation.buckets.reduce(
(totalAcc, bucket) => (bucket.belowThreshold ? totalAcc + 1 : totalAcc + bucket.doc_count),
(totalAcc, bucket) =>
bucket.relation === Relation.gte ? totalAcc + 1 : totalAcc + bucket.doc_count,
0,
);
};
Expand All @@ -29,12 +37,10 @@ const calculateHitsFromAggregation = ({
* 2) Find the agg with the most bucket count and data masking applied to be used in calculating hits.total
*
* @param aggregations - aggregations from query
* @param thresholdMin - threshold value
* @returns aggregations with data masking applied and hits total
*/
export const applyAggregationMasking = ({
aggregations,
thresholdMin,
}: {
aggregations: Record<
string,
Expand All @@ -43,13 +49,16 @@ export const applyAggregationMasking = ({
buckets: Array<{
doc_count: number;
key: string;
relation: Relation;
}>;
}
>;
thresholdMin: number;
}) => {
// set data masked properties to one less than the configured threshold value (under threshold)
const THRESHOLD_REPLACEMENT_VALUE = thresholdMin - 1;
const thresholdMin = ENV_CONFIG.DATA_MASK_MIN_THRESHOLD;
if (thresholdMin < 1) {
throw Error('DATA_MASK_MIN_THRESHOLD environment variable has to be a positive integer.');
}
const THRESHOLD_REPLACEMENT_VALUE = 1;

const { aggsTotal: dataMaskedAggregations, totalHitsAgg } = Object.entries(aggregations).reduce<{
aggsTotal: Record<string, Aggregation>;
Expand All @@ -59,12 +68,13 @@ export const applyAggregationMasking = ({
// mask buckets if under threshold
const dataMaskedBuckets = aggregation.buckets.map((bucket) =>
bucket.doc_count < thresholdMin
? { ...bucket, doc_count: THRESHOLD_REPLACEMENT_VALUE, belowThreshold: true }
: { ...bucket, belowThreshold: false },
? { ...bucket, doc_count: THRESHOLD_REPLACEMENT_VALUE, relation: Relation.gte }
: { ...bucket, relation: Relation.eq },
);

// update total hits selected agg if needed
const bucketIsMasked = dataMaskedBuckets.some((bucket) => bucket.belowThreshold);
const bucketIsMasked = dataMaskedBuckets.some((bucket) => bucket.relation === Relation.gte);
// take aggregation with the most buckets that has masked data
const hitsAgg =
totalHitsAgg.bucketCount < aggregation.bucket_count && bucketIsMasked
? { key: type, bucketCount: aggregation.bucket_count }
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,46 @@
import { Resolver } from '@/gqlServer';
import { GetServerSideFilterFn } from '@/utils/getDefaultServerSideFilter';
import getFields from 'graphql-fields';

import { buildAggregations, buildQuery, flattenAggregations } from '../middleware';

import { resolveSetsInSqon } from './hackyTemporaryEsSetResolution';
import { Relation } from './masking';
import compileFilter from './utils/compileFilter';
import esSearch from './utils/esSearch';

export default ({ type, getServerSideFilter }) => {
return async (
/*
* GQL query types
*/
type GQLAggregationQueryFilters = {
filters: any;
aggregations_filter_themselves: boolean;
include_missing: boolean;
};

/*
* Types
*/
export type Bucket = {
doc_count: number;
key: string;
relation: Relation;
};

export type Aggregation = {
bucket_count: number;
buckets: Bucket[];
};

type Aggregations = Record<string, Aggregation>;

const resolveAggregations = ({
type,
getServerSideFilter,
}: {
type: { index: string; nested_fieldNames: string[] };
getServerSideFilter: GetServerSideFilterFn;
}) => {
const resolver: Resolver<unknown, GQLAggregationQueryFilters, Promise<Aggregations>> = async (
obj,
{ filters, aggregations_filter_themselves, include_missing = true },
context,
Expand All @@ -26,7 +59,7 @@ export default ({ type, getServerSideFilter }) => {
nestedFieldNames,
filters: compileFilter({
clientSideFilter: resolvedFilter,
serverSideFilter: getServerSideFilter(context),
serverSideFilter: getServerSideFilter(),
Copy link
Contributor Author

@ciaranschutte ciaranschutte Dec 15, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't appear to be needed? As far as I read, es client is only thing in context and the filter ops shouldn't be used directly in that. thoughts? - Jon, Justin @

}),
});

Expand All @@ -48,19 +81,27 @@ export default ({ type, getServerSideFilter }) => {
const response = await esSearch(esClient)({
index: type.index,
size: 0,
// @ts-expect-error - valid search query parameter in ES 7.17, not in types
_source: false,
body,
});

const aggregations = flattenAggregations({
aggregations: response.aggregations,
includeMissing: include_missing,
});

return aggregations;
};
return resolver;
};

const toGraphqlField = (acc, [a, b]) => ({ ...acc, [a.replace(/\./g, '__')]: b });
export const aggregationsToGraphql = (aggregations) => {
return Object.entries(aggregations).reduce(toGraphqlField, {});
export default resolveAggregations;

const toGraphqlField = (acc: Aggregations, [a, b]: [string, Aggregation]) => ({
...acc,
[a.replace(/\./g, '__')]: b,
});
export const aggregationsToGraphql = (aggregations: Aggregations) => {
return Object.entries(aggregations).reduce<Aggregations>(toGraphqlField, {});
};
Loading