Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Add validateValueType function to ensure data type integrity #1002

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@ const networkSourceData = {
CIDR: '255.255.255.0',
name: 'My Network',
notInDataModel: 'Not In Data Model',
owner: { name: 'Bob' },
summary: [{ title: 'Summary' }, { description: 'Description' }],
};

const networkResourceEntity = {
Expand Down Expand Up @@ -118,20 +116,6 @@ describe('createIntegrationEntity', () => {
expect('notWhitelisted' in entity).toBe(false);
});

test('ignore source properties that are object types', () => {
const entity = createIntegrationEntity({
entityData,
});
expect('owner' in entity).toBe(false);
});

test('ignore source properties that are object[] types', () => {
const entity = createIntegrationEntity({
entityData,
});
expect('summary' in entity).toBe(false);
});

test('handles empty tags in source data', () => {
const entity = createIntegrationEntity({
entityData: {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
/* eslint-disable no-console */
import { validateValueType } from '../createIntegrationEntity';

describe('validateValueType', () => {
test('should accept string type', () => {
expect(() => validateValueType('Sample String', 'testPath')).not.toThrow();
});

test('should accept number type', () => {
expect(() => validateValueType(123, 'testPath')).not.toThrow();
});

test('should accept boolean type', () => {
expect(() => validateValueType(true, 'testPath')).not.toThrow();
});

test('should accept null type', () => {
expect(() => validateValueType(null, 'testPath')).not.toThrow();
});

test('should accept array of supported types', () => {
expect(() => validateValueType([1, 'a', true], 'testPath')).not.toThrow();
});

test('should reject nested arrays with unsupported types', () => {
expect(() =>
validateValueType([1, ['a', { key: 'value' }], true], 'testPath'),
).toThrow();
});

test('should reject object type', () => {
expect(() => validateValueType({ key: 'value' }, 'testPath')).toThrow();
});

test('should reject unsupported type in array', () => {
expect(() =>
validateValueType([1, 2, { key: 'value' }], 'testPath'),
).toThrow();
});

test('should reject array inside arrays', () => {
expect(() =>
validateValueType([1, 2, { key: 'value' }, [1, [4, 5, 6]]], 'testPath'),
).toThrow();
});
});

describe('validateValueType - successful performance tests', () => {
const largeDatasetSize = 500000;

function generateLargeDataset(type) {
switch (type) {
case 'string':
return new Array(largeDatasetSize).fill('testString');
case 'number':
return Array.from({ length: largeDatasetSize }, (_, i) => i);
case 'boolean':
return new Array(largeDatasetSize).fill(true);
case 'array':
return new Array(largeDatasetSize).fill([1, 2, 3]);
case 'undefined':
return new Array(largeDatasetSize).fill(undefined);
default:
throw new Error(`Unsupported type: ${type}`);
}
}

['string', 'number', 'boolean', 'array', 'undefined'].forEach((type) => {
test(`should handle large dataset of type ${type} efficiently`, () => {
const largeDataset = generateLargeDataset(type);

const start = performance.now();
largeDataset.forEach((item) =>
validateValueType(item, `testPath-${type}`),
);
const end = performance.now();

const executionTime = end - start;

console.log(`Execution time for ${type} dataset: ${executionTime} ms`);

const acceptableTimeLimit = 1000;
expect(executionTime).toBeLessThan(acceptableTimeLimit);
});
});
});

describe('validateValueType - error Scenarios', () => {
const largeDatasetSize = 500000;

function generateErrorDataset() {
const data: any[] = [];
for (let i = 0; i < largeDatasetSize; i++) {
if (i % 50000 === 0) {
data.push({ key: `value-${i}` });
} else {
data.push(i % 2 === 0 ? `string-${i}` : i);
}
}
return data;
}

test(`should throw error for large dataset with unsupported types`, () => {
const errorDataset = generateErrorDataset();
let executionTime = 0;

const start = performance.now();
try {
errorDataset.forEach((item) =>
validateValueType(item, `testPath-errorCase`),
);
const end = performance.now();
executionTime = end - start;
} catch (error) {
const end = performance.now();
executionTime = end - start;

console.log(`Execution time until error: ${executionTime} ms`);
}

expect(executionTime).toBeGreaterThan(0);

const acceptableErrorDetectionTime = 1000;
expect(executionTime).toBeLessThan(acceptableErrorDetectionTime);

expect(() => {
errorDataset.forEach((item) =>
validateValueType(item, `testPath-errorCase`),
);
}).toThrow();
});
});
76 changes: 64 additions & 12 deletions packages/integration-sdk-core/src/data/createIntegrationEntity.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import { parseTimePropertyValue } from './converters';
import { validateRawData } from './rawData';
import { assignTags, ResourceTagList, ResourceTagMap } from './tagging';

const SUPPORTED_TYPES = ['string', 'number', 'boolean'];
const SUPPORTED_TYPES = ['string', 'number', 'boolean', 'array', 'undefined'];
i5o marked this conversation as resolved.
Show resolved Hide resolved

/**
* Properties to be assigned to a generated entity which are declared in code
Expand Down Expand Up @@ -125,7 +125,7 @@ function generateEntity({
const _class = Array.isArray(assign._class) ? assign._class : [assign._class];

const entity: GeneratedEntity = {
...(source ? whitelistedProviderData(source, _class) : {}),
...(source ? allowedProviderData(source, _class) : {}),
...assign,
_class,
_rawData,
Expand Down Expand Up @@ -180,27 +180,79 @@ function generateEntity({
}

/**
* Answers a form of the provider data with only the properties supported by the
* Validates that the provided value conforms to the supported types.
* If the value is an array, this function is called recursively on each element
* to ensure nested arrays are also validated. This function throws an error
* if it encounters a value type that is not supported.
*
* @param value The value to be validated. It can be a single value or an array.
* For arrays, each element is validated recursively.
* @param path The path to the current property being validated, used for error messaging.
* This is updated with each recursive call to reflect the current context.
* @param currentValueDepth The current depth of recursion for array validation. It starts at 1
* and increments with each recursive call into deeper array levels.
* This parameter is used to prevent validation of nested arrays beyond the first level.
*/
export function validateValueType(
value: any,
path: string,
currentValueDepth: number = 1,
): void {
// Explicitly allow null values
if (value === null) {
return;
}

if (Array.isArray(value)) {
// If the depth is > 1 then we won't allow arrays inside arrays.
if (currentValueDepth > 1) {
throw new IntegrationError({
code: 'UNSUPPORTED_TYPE',
message: `Unsupported type found at "${path}": Nested arrays are not supported.`,
});
}

// If the value is an array, validate each element
value.forEach((item, index) => {
validateValueType(item, `${path}[${index}]`, ++currentValueDepth);
});
} else {
// For non-array values, check if the type is supported
const valueType = typeof value;
if (!SUPPORTED_TYPES.includes(valueType)) {
throw new IntegrationError({
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How can we keep steps from failing on unsupported data? This is a step towards strictness that'll have any overall impact on job statuses and the ingestion team.
How else could we improve the overall health of jobs and data?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should open a bigger discussion for this but this should be a MAJOR, we have some ideas in the AWS Pod as to how to proceed

code: 'UNSUPPORTED_TYPE',
message: `Unsupported type found at "${path}": ${valueType}`,
});
}
}
}

/**
* Filters the provider data to include only the properties allowed by the
* data model schema.
*
* @param source resource data from the resource provider/external system
* @param _class entity `_class: string[]` value
* @param {ProviderSourceData} source - The resource data from the provider or external system.
* @param {string[]} _class - An array representing the entity's class for schema validation.
* @returns {Omit<ProviderSourceData, 'tags'>} - The filtered provider data with only allowed properties.
*/
function whitelistedProviderData(
function allowedProviderData(
source: ProviderSourceData,
_class: string[],
): Omit<ProviderSourceData, 'tags'> {
const whitelistedProviderData: ProviderSourceData = {};
const filteredProviderData: ProviderSourceData = {};
const schemaProperties = schemaWhitelistedPropertyNames(_class);

for (const [key, value] of Object.entries(source)) {
// Ensure the property is part of the schema and not null
if (value != null && schemaProperties.includes(key)) {
const valueType = Array.isArray(value) ? typeof value[0] : typeof value;
if (SUPPORTED_TYPES.includes(valueType)) {
whitelistedProviderData[key] = value;
}
if (key != 'tags') validateValueType(value, key);

// If validation passes, assign the value to the filtered data
filteredProviderData[key] = value;
}
}
return whitelistedProviderData;
return filteredProviderData;
}

/**
Expand Down