Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: update covariance metric logic #39

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -56,24 +56,20 @@ object MultiColumnMetrics {
protected def copyWithError(status: CalculatorStatus, msg: String, failInc: Long = 1): MetricCalculator =
this.copy(failCount = failCount + failInc, status = status, failMsg = msg)

override def result(): Map[String, (Double, Option[String])] = if (failCount == 0 && n > 0) {
override def result(): Map[String, (Double, Option[String])] = {
val coMomentAdj = if (n > 0) coMoment else Double.NaN // return NaN from empty calculator state
val covariance = if (n > 0) coMoment / n else Double.NaN
val covarianceBessel = if (n > 1) coMoment / (n - 1) else Double.NaN
Map(
MetricName.CoMoment.entryName -> (coMoment, None),
MetricName.Covariance.entryName -> (coMoment / n, None),
MetricName.CovarianceBessel.entryName -> (coMoment / (n - 1), None)
)
} else {
val msg = Some("Metric calculation failed due to some of the processed values cannot be cast to number.")
Map(
MetricName.CoMoment.entryName -> (Double.NaN, msg),
MetricName.Covariance.entryName -> (Double.NaN, msg),
MetricName.CovarianceBessel.entryName -> (Double.NaN, msg)
MetricName.CoMoment.entryName -> (coMomentAdj, None),
MetricName.Covariance.entryName -> (covariance, None),
MetricName.CovarianceBessel.entryName -> (covarianceBessel, None)
)
}

override def merge(m2: MetricCalculator): MetricCalculator = {
val that: CovarianceMetricCalculator = m2.asInstanceOf[CovarianceMetricCalculator]
CovarianceMetricCalculator(
if (this.n == 0) that else CovarianceMetricCalculator(
(this.lMean * this.n + that.lMean * that.n) / (this.n + that.n),
(this.rMean * this.n + that.rMean * that.n) / (this.n + that.n),
this.coMoment + that.coMoment +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,17 @@ object Serialization {
* Sequence retain order in which were defined.
* @return Sequence of tuples: fieldName -> fieldValue
*/
private def fieldsValues: Seq[(String, Any)] =
value.productIterator.toSeq.zip(getFields).map{
case (_, n) if dateReplacement.contains(n) => n -> dateReplacement(n)
case (v, n) => n -> v
}
private def fieldsValues: Seq[(String, Any)] = {
val fieldsSet = getFields.toSet
value.getClass.getDeclaredFields
.filter(f => fieldsSet.contains(f.getName)) // retain only fields that are case class fields.
.foldLeft(Seq.empty[(String, Any)]){ (s, f) =>
f.setAccessible(true)
val n = f.getName
val v = dateReplacement.getOrElse(n, f.get(value).asInstanceOf[Any])
s :+ (n -> v)
}
}

/**
* Serialize result entity into unified JSON string.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package ru.raiffeisen.checkita.utils

import enumeratum.EnumEntry
import eu.timepit.refined.api.Refined
import org.apache.commons.io.FileUtils.openInputStream
import org.apache.commons.io.IOUtils.toInputStream
Expand Down Expand Up @@ -50,12 +51,19 @@ object Common {
def getFieldsMap[T <: Product with Serializable : TypeTag](obj: T): Map[String, Any] = {
val fields = typeOf[T].members.collect {
case m: MethodSymbol if m.isCaseAccessor => m.name.toString
}
obj.productIterator.toSeq.zip(fields).map{
case (v: Refined[_, _], k) => k -> v.value
case (v: DateFormat, k) => k -> v.pattern
case (v, k) => k -> v
}.toMap
}.toSet
obj.getClass.getDeclaredFields
.filter(f => fields.contains(f.getName)) // retain only fields that are case class fields.
.foldLeft(Map.empty[String, Any]){ (m, f) =>
f.setAccessible(true)
val value = f.get(obj).asInstanceOf[Any] match {
case v: Refined[_, _] => v.value
case v: DateFormat => v.pattern
case v: EnumEntry => v.toString
case v => v
}
m + (f.getName -> value)
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,16 +52,28 @@ class MultiColumnMetricsSpec extends AnyWordSpec with Matchers {
t.getFailCounter shouldEqual 0
}
}

"return NaN values when sequence contains non-number values" in {
val metricResults = Seq(testValues.head, testValues(3)).map(s => s.foldLeft[MetricCalculator](

"return correct result and fail counts when sequence contains non-number values" in {
val results = (545.5746740000001, 181.8582246666667, 272.78733700000004)
val metricCalc = testValues(3).foldLeft[MetricCalculator](
new CovarianceMetricCalculator())((m, v) => m.increment(v))
val metricResult = metricCalc.result()
val metricFailCnt = metricCalc.getFailCounter

metricResult(MetricName.CoMoment.entryName)._1 shouldEqual results._1
metricResult(MetricName.Covariance.entryName)._1 shouldEqual results._2
metricResult(MetricName.CovarianceBessel.entryName)._1 shouldEqual results._3

metricFailCnt shouldEqual 3
}

"return NaN values when sequence do not contain numeric values" in {
val metricResult = testValues.head.foldLeft[MetricCalculator](
new CovarianceMetricCalculator())((m, v) => m.increment(v)).result()
)
metricResults.foreach { v =>
v(MetricName.CoMoment.entryName)._1.isNaN shouldEqual true
v(MetricName.Covariance.entryName)._1.isNaN shouldEqual true
v(MetricName.CovarianceBessel.entryName)._1.isNaN shouldEqual true
}

metricResult(MetricName.CoMoment.entryName)._1.isNaN shouldEqual true
metricResult(MetricName.Covariance.entryName)._1.isNaN shouldEqual true
metricResult(MetricName.CovarianceBessel.entryName)._1.isNaN shouldEqual true
}

"return fail status and correct fail counts when sequence contains non-number values" in {
Expand Down
Loading