From 1a89779a17aa4e73a6297cfb85717304cb276fbf Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Wed, 31 Jul 2024 14:18:33 +0100 Subject: [PATCH] [WIP] Demo of maxJsonDepth when validating JSON --- .../client/IgluCirceClient.scala | 25 +++++++++-- .../client/validator/CirceValidator.scala | 41 ++++++++++++------- .../io/circe/jackson/snowplow/package.scala | 8 ++-- 3 files changed, 53 insertions(+), 21 deletions(-) diff --git a/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/IgluCirceClient.scala b/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/IgluCirceClient.scala index 3d39cacf..207bb1be 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/IgluCirceClient.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/IgluCirceClient.scala @@ -36,7 +36,8 @@ import io.circe.{DecodingFailure, Json} */ final class IgluCirceClient[F[_]] private ( resolver: Resolver[F], - schemaEvaluationCache: SchemaEvaluationCache[F] + schemaEvaluationCache: SchemaEvaluationCache[F], + maxJsonDepth: Int ) { def check( instance: SelfDescribingData[Json] @@ -50,7 +51,8 @@ final class IgluCirceClient[F[_]] private ( resolver.lookupSchemaResult(instance.schema, resolveSupersedingSchema = true) ) validation = - CirceValidator.WithCaching.validate(schemaEvaluationCache)(instance.data, resolverResult) + CirceValidator.WithCaching + .validate(schemaEvaluationCache)(instance.data, resolverResult, maxJsonDepth) _ <- EitherT(validation).leftMap(e => e.toClientError(resolverResult.value.supersededBy.map(_.asString)) ) @@ -61,21 +63,36 @@ final class IgluCirceClient[F[_]] private ( object IgluCirceClient { + @deprecated("Use `parseDefault(json, maxJsonDepth)`", "3.2.0") def parseDefault[F[_]: Monad: CreateResolverCache: InitValidatorCache]( json: Json + ): EitherT[F, DecodingFailure, IgluCirceClient[F]] = + parseDefault(json, Int.MaxValue) + + def parseDefault[F[_]: Monad: CreateResolverCache: InitValidatorCache]( + json: Json, + maxJsonDepth: Int ): EitherT[F, DecodingFailure, IgluCirceClient[F]] = for { config <- EitherT.fromEither[F](Resolver.parseConfig(json)) resolver <- Resolver.fromConfig[F](config) - client <- EitherT.liftF(fromResolver(resolver, config.cacheSize)) + client <- EitherT.liftF(fromResolver(resolver, config.cacheSize, maxJsonDepth)) } yield client + @deprecated("Use `fromResolver(resolver, cacheSize, maxJsonDepth)`", "3.2.0") def fromResolver[F[_]: Monad: InitValidatorCache]( resolver: Resolver[F], cacheSize: Int + ): F[IgluCirceClient[F]] = + fromResolver(resolver, cacheSize, Int.MaxValue) + + def fromResolver[F[_]: Monad: InitValidatorCache]( + resolver: Resolver[F], + cacheSize: Int, + maxJsonDepth: Int ): F[IgluCirceClient[F]] = { schemaEvaluationCache[F](cacheSize).map { cache => - new IgluCirceClient(resolver, cache) + new IgluCirceClient(resolver, cache, maxJsonDepth) } } diff --git a/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/validator/CirceValidator.scala b/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/validator/CirceValidator.scala index be19e390..1d959972 100644 --- a/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/validator/CirceValidator.scala +++ b/modules/core/src/main/scala/com.snowplowanalytics.iglu/client/validator/CirceValidator.scala @@ -88,25 +88,30 @@ object CirceValidator extends Validator[Json] { V4SchemaInstance.getSchema(new ObjectMapper().readTree(MetaSchemas.JsonSchemaV4Text)) def validate(data: Json, schema: Json): Either[ValidatorError, Unit] = { - val jacksonJson = circeToJackson(schema) + val jacksonJson = circeToJackson(schema, Int.MaxValue) evaluateSchema(jacksonJson) .flatMap { schema => - validateOnReadySchema(schema, data).leftMap(ValidatorError.InvalidData.apply) + validateOnReadySchema(schema, data, Int.MaxValue).leftMap(ValidatorError.InvalidData.apply) } } - def checkSchema(schema: Json): List[ValidatorError.SchemaIssue] = { - val jacksonJson = circeToJackson(schema) + @deprecated("Use `checkSchema(schema, maxJsonDepth)`", "3.2.0") + def checkSchema(schema: Json): List[ValidatorError.SchemaIssue] = + checkSchema(schema, Int.MaxValue) + + def checkSchema(schema: Json, maxJsonDepth: Int): List[ValidatorError.SchemaIssue] = { + val jacksonJson = circeToJackson(schema, maxJsonDepth) validateSchemaAgainstV4(jacksonJson) } /** Validate instance against schema and return same instance */ private def validateOnReadySchema( schema: JsonSchema, - instance: Json + instance: Json, + maxJsonDepth: Int ): EitherNel[ValidatorReport, Unit] = { val messages = schema - .validate(circeToJackson(instance)) + .validate(circeToJackson(instance, maxJsonDepth)) .asScala .toList .map(fromValidationMessage) @@ -154,11 +159,15 @@ object CirceValidator extends Validator[Json] { def validate[F[_]: Monad]( schemaEvaluationCache: SchemaEvaluationCache[F] - )(data: Json, schema: SchemaLookupResult): F[Either[ValidatorError, Unit]] = { - getFromCacheOrEvaluate(schemaEvaluationCache)(schema) + )( + data: Json, + schema: SchemaLookupResult, + maxJsonDepth: Int + ): F[Either[ValidatorError, Unit]] = { + getFromCacheOrEvaluate(schemaEvaluationCache)(schema, maxJsonDepth) .map { _.flatMap { jsonschema => - validateOnReadySchema(jsonschema, data) + validateOnReadySchema(jsonschema, data, maxJsonDepth) .leftMap(ValidatorError.InvalidData.apply) } } @@ -166,26 +175,30 @@ object CirceValidator extends Validator[Json] { private def getFromCacheOrEvaluate[F[_]: Monad]( evaluationCache: SchemaEvaluationCache[F] - )(result: SchemaLookupResult): F[Either[ValidatorError.InvalidSchema, JsonSchema]] = { + )( + result: SchemaLookupResult, + maxJsonDepth: Int + ): F[Either[ValidatorError.InvalidSchema, JsonSchema]] = { result match { case ResolverResult.Cached(key, SchemaItem(schema, _), timestamp) => evaluationCache.get((key, timestamp)).flatMap { case Some(alreadyEvaluatedSchema) => alreadyEvaluatedSchema.pure[F] case None => - provideNewJsonSchema(schema) + provideNewJsonSchema(schema, maxJsonDepth) .pure[F] .flatTap(result => evaluationCache.put((key, timestamp), result)) } case ResolverResult.NotCached(SchemaItem(schema, _)) => - provideNewJsonSchema(schema).pure[F] + provideNewJsonSchema(schema, maxJsonDepth).pure[F] } } private def provideNewJsonSchema( - schema: Json + schema: Json, + maxJsonDepth: Int ): Either[ValidatorError.InvalidSchema, JsonSchema] = { - val schemaAsNode = circeToJackson(schema) + val schemaAsNode = circeToJackson(schema, maxJsonDepth) for { _ <- validateSchema(schemaAsNode) evaluated <- evaluateSchema(schemaAsNode) diff --git a/modules/core/src/main/scala/io/circe/jackson/snowplow/package.scala b/modules/core/src/main/scala/io/circe/jackson/snowplow/package.scala index 7ca7a2dd..acd37352 100644 --- a/modules/core/src/main/scala/io/circe/jackson/snowplow/package.scala +++ b/modules/core/src/main/scala/io/circe/jackson/snowplow/package.scala @@ -34,7 +34,7 @@ package object snowplow { * @param json instance of circe's Json * @return converted JsonNode */ - final def circeToJackson(json: Json): JsonNode = + final def circeToJackson(json: Json, maxDepth: Int): JsonNode = json.fold( NullNode.instance, BooleanNode.valueOf(_), @@ -71,12 +71,14 @@ package object snowplow { } }, s => TextNode.valueOf(s), - array => JsonNodeFactory.instance.arrayNode.addAll(array.map(circeToJackson).asJava), + array => + JsonNodeFactory.instance.arrayNode + .addAll(array.map(circeToJackson(_, maxDepth - 1)).asJava), obj => objectNodeSetAll( JsonNodeFactory.instance.objectNode, obj.toMap.map { case (k, v) => - (k, circeToJackson(v)) + (k, circeToJackson(v, maxDepth - 1)) }.asJava ) )