Skip to content

Commit

Permalink
concept-api: Add migration that adds concept subjects as tags
Browse files Browse the repository at this point in the history
  • Loading branch information
jnatten committed Feb 5, 2025
1 parent cfab8c1 commit d476579
Show file tree
Hide file tree
Showing 3 changed files with 169 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import no.ndla.network.NdlaClient
import no.ndla.search.{BaseIndexService, Elastic4sClient}
import no.ndla.common.Clock
import no.ndla.common.configuration.BaseComponentRegistry
import no.ndla.conceptapi.db.migrationwithdependencies.V23__SubjectNameAsTags
import no.ndla.database.{DBMigrator, DataSource}
import no.ndla.network.tapir.TapirApplication

Expand Down Expand Up @@ -60,7 +61,9 @@ class ComponentRegistry(properties: ConceptApiProperties)
with SwaggerDocControllerConfig
with ConceptControllerHelpers {
override val props: ConceptApiProperties = properties
override val migrator: DBMigrator = DBMigrator()
override val migrator: DBMigrator = DBMigrator(
new V23__SubjectNameAsTags(props)
)

override val dataSource: HikariDataSource = DataSource.getHikariDataSource
DataSource.connectToDatabase()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
/*
* Part of NDLA concept-api
* Copyright (C) 2025 NDLA
*
* See LICENSE
*
*/

package no.ndla.conceptapi.db.migrationwithdependencies

import io.circe.{Json, parser}
import no.ndla.common.CirceUtil
import no.ndla.conceptapi.ConceptApiProperties
import no.ndla.database.DocumentMigration
import sttp.client3.quick.*
import io.circe.generic.auto.*
import io.circe.syntax.EncoderOps

case class TaxonomyTranslation(name: String, language: String)
case class TaxonomySubject(id: String, name: String, translations: List[TaxonomyTranslation])
case class LanguageObject(language: String)
case class TagsObject(tags: List[String], language: String)

class V23__SubjectNameAsTags(
properties: ConceptApiProperties,
prefetchedSubjects: Option[List[TaxonomySubject]] = None
) extends DocumentMigration {
override val columnName: String = "document"
override val tableName: String = "conceptdata"

def toMap(subject: TaxonomySubject): Map[String, String] =
subject.translations
.map(t => t.language -> t.name)
.toMap
.withDefaultValue(subject.name)

lazy val subjects: List[TaxonomySubject] = prefetchedSubjects match {
case Some(value) => value
case None =>
val request = quickRequest.get(uri"${properties.TaxonomyUrl}/v1/nodes?nodeType=SUBJECT")
val response = simpleHttpClient.send(request)
CirceUtil.unsafeParseAs[List[TaxonomySubject]](response.body)
}

lazy val subjectIdToTranslationsMap: Map[String, Map[String, String]] = {
subjects.map { subject => subject.id -> toMap(subject) }.toMap
}

private def getLanguagesOfField(fieldName: String, json: Json): List[String] = {
json.hcursor.downField(fieldName).as[Option[List[LanguageObject]]].toTry.get match {
case Some(languageObjects) if languageObjects.nonEmpty => languageObjects.map(_.language)
case _ => List.empty
}
}

def getLanguages(json: Json): List[String] = {
val fields = List("title", "content", "tags", "visualElement", "metaImage")
fields.flatMap(field => getLanguagesOfField(field, json)).distinct
}

def getTags(json: Json): List[TagsObject] = {
json.hcursor.downField("tags").as[Option[List[TagsObject]]].toTry.get.getOrElse(List.empty)
}

override def convertColumn(document: String): String = {
val oldDocument = parser.parse(document).toTry.get
val languages = getLanguages(oldDocument)
val existingTags = getTags(oldDocument)
oldDocument.hcursor.downField("subjectIds").as[Option[List[String]]].toTry.get match {
case Some(subjectIds) if subjectIds.nonEmpty =>
val newTags = subjectIds.foldLeft(existingTags) { case (accTags, sid) =>
val sidTranslations = subjectIdToTranslationsMap(sid)
languages.map { lang =>
val tr = sidTranslations(lang)
val t = accTags.find(_.language == lang).getOrElse(TagsObject(List.empty, lang))
t.copy(tags = t.tags :+ tr)
}
}

oldDocument.mapObject {
case o if !o.contains("tags") => o
case o => o.remove("tags").add("tags", newTags.asJson)
}.noSpaces

case _ => document
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
/*
* Part of NDLA concept-api
* Copyright (C) 2025 NDLA
*
* See LICENSE
*
*/

package no.ndla.conceptapi.db.migrationwithdependencies

import io.circe.syntax.EncoderOps
import no.ndla.common.CirceUtil
import no.ndla.common.model.domain.{Tag, Title}
import no.ndla.common.model.domain.concept.{Concept, ConceptContent, ConceptMetaImage, VisualElement}
import no.ndla.conceptapi.{TestData, TestEnvironment, UnitSuite}

class V23__SubjectNameAsTagsTest extends UnitSuite with TestEnvironment {
val fakeSubjects: List[TaxonomySubject] = List(
TaxonomySubject(
"urn:subject:1",
"Naturfag",
List(
TaxonomyTranslation("Naturfag", "nb"),
TaxonomyTranslation("Naturfagi", "nn"),
TaxonomyTranslation("Science", "en"),
TaxonomyTranslation("科学", "zh"),
TaxonomyTranslation("Luonddufágga", "sma")
)
),
TaxonomySubject(
"urn:subject:2",
"Matte",
List(
TaxonomyTranslation("Matematik", "nb"),
TaxonomyTranslation("Matematiki", "nn"),
TaxonomyTranslation("Math", "en")
)
)
)

val migration = new V23__SubjectNameAsTags(props, prefetchedSubjects = Some(fakeSubjects))

test("That we can get languages from a concept json string") {
val concept = TestData.domainConcept.copy(
title = List(Title("Tittel", "nb")),
content = List(ConceptContent("Innhold", "sma")),
tags = List(Tag(List("tag1", "tag2", "tag3"), "nn")),
visualElement = List(VisualElement("zzz", "en")),
metaImage = List(ConceptMetaImage("123", "zzz", "zh"))
)
val languages = migration.getLanguages(concept.asJson)
languages should be(List("nb", "sma", "nn", "en", "zh"))
}

test("That adding tags works as expected") {
val concept = TestData.domainConcept.copy(
title = List(Title("Tittel", "nb")),
content = List(ConceptContent("Innhold", "sma")),
tags = List(Tag(List("nb"), "nb"), Tag(List("nn"), "nn"), Tag(List("en"), "en"), Tag(List("zh"), "zh")),
visualElement = List(VisualElement("zzz", "en")),
metaImage = List(ConceptMetaImage("123", "zzz", "zh")),
subjectIds = Set("urn:subject:1", "urn:subject:2")
)

val result = migration.convertColumn(concept.asJson.noSpaces)
CirceUtil.unsafeParseAs[Concept](result).tags.sortBy(_.language) should be(
List(
Tag(List("nb", "Naturfag", "Matematik"), "nb"),
Tag(List("nn", "Naturfagi", "Matematiki"), "nn"),
Tag(List("en", "Science", "Math"), "en"),
Tag(List("zh", "科学", "Matte"), "zh"),
Tag(List("Luonddufágga", "Matte"), "sma")
).sortBy(_.language)
)

}
}

0 comments on commit d476579

Please sign in to comment.