-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
concept-api: Add migration that adds concept subjects as tags
- Loading branch information
Showing
3 changed files
with
169 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
88 changes: 88 additions & 0 deletions
88
...c/main/scala/no/ndla/conceptapi/db/migrationwithdependencies/V23__SubjectNameAsTags.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,88 @@ | ||
/* | ||
* Part of NDLA concept-api | ||
* Copyright (C) 2025 NDLA | ||
* | ||
* See LICENSE | ||
* | ||
*/ | ||
|
||
package no.ndla.conceptapi.db.migrationwithdependencies | ||
|
||
import io.circe.{Json, parser} | ||
import no.ndla.common.CirceUtil | ||
import no.ndla.conceptapi.ConceptApiProperties | ||
import no.ndla.database.DocumentMigration | ||
import sttp.client3.quick.* | ||
import io.circe.generic.auto.* | ||
import io.circe.syntax.EncoderOps | ||
|
||
case class TaxonomyTranslation(name: String, language: String) | ||
case class TaxonomySubject(id: String, name: String, translations: List[TaxonomyTranslation]) | ||
case class LanguageObject(language: String) | ||
case class TagsObject(tags: List[String], language: String) | ||
|
||
class V23__SubjectNameAsTags( | ||
properties: ConceptApiProperties, | ||
prefetchedSubjects: Option[List[TaxonomySubject]] = None | ||
) extends DocumentMigration { | ||
override val columnName: String = "document" | ||
override val tableName: String = "conceptdata" | ||
|
||
def toMap(subject: TaxonomySubject): Map[String, String] = | ||
subject.translations | ||
.map(t => t.language -> t.name) | ||
.toMap | ||
.withDefaultValue(subject.name) | ||
|
||
lazy val subjects: List[TaxonomySubject] = prefetchedSubjects match { | ||
case Some(value) => value | ||
case None => | ||
val request = quickRequest.get(uri"${properties.TaxonomyUrl}/v1/nodes?nodeType=SUBJECT") | ||
val response = simpleHttpClient.send(request) | ||
CirceUtil.unsafeParseAs[List[TaxonomySubject]](response.body) | ||
} | ||
|
||
lazy val subjectIdToTranslationsMap: Map[String, Map[String, String]] = { | ||
subjects.map { subject => subject.id -> toMap(subject) }.toMap | ||
} | ||
|
||
private def getLanguagesOfField(fieldName: String, json: Json): List[String] = { | ||
json.hcursor.downField(fieldName).as[Option[List[LanguageObject]]].toTry.get match { | ||
case Some(languageObjects) if languageObjects.nonEmpty => languageObjects.map(_.language) | ||
case _ => List.empty | ||
} | ||
} | ||
|
||
def getLanguages(json: Json): List[String] = { | ||
val fields = List("title", "content", "tags", "visualElement", "metaImage") | ||
fields.flatMap(field => getLanguagesOfField(field, json)).distinct | ||
} | ||
|
||
def getTags(json: Json): List[TagsObject] = { | ||
json.hcursor.downField("tags").as[Option[List[TagsObject]]].toTry.get.getOrElse(List.empty) | ||
} | ||
|
||
override def convertColumn(document: String): String = { | ||
val oldDocument = parser.parse(document).toTry.get | ||
val languages = getLanguages(oldDocument) | ||
val existingTags = getTags(oldDocument) | ||
oldDocument.hcursor.downField("subjectIds").as[Option[List[String]]].toTry.get match { | ||
case Some(subjectIds) if subjectIds.nonEmpty => | ||
val newTags = subjectIds.foldLeft(existingTags) { case (accTags, sid) => | ||
val sidTranslations = subjectIdToTranslationsMap(sid) | ||
languages.map { lang => | ||
val tr = sidTranslations(lang) | ||
val t = accTags.find(_.language == lang).getOrElse(TagsObject(List.empty, lang)) | ||
t.copy(tags = t.tags :+ tr) | ||
} | ||
} | ||
|
||
oldDocument.mapObject { | ||
case o if !o.contains("tags") => o | ||
case o => o.remove("tags").add("tags", newTags.asJson) | ||
}.noSpaces | ||
|
||
case _ => document | ||
} | ||
} | ||
} |
77 changes: 77 additions & 0 deletions
77
...st/scala/no/ndla/conceptapi/db/migrationwithdependencies/V23__SubjectNameAsTagsTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
/* | ||
* Part of NDLA concept-api | ||
* Copyright (C) 2025 NDLA | ||
* | ||
* See LICENSE | ||
* | ||
*/ | ||
|
||
package no.ndla.conceptapi.db.migrationwithdependencies | ||
|
||
import io.circe.syntax.EncoderOps | ||
import no.ndla.common.CirceUtil | ||
import no.ndla.common.model.domain.{Tag, Title} | ||
import no.ndla.common.model.domain.concept.{Concept, ConceptContent, ConceptMetaImage, VisualElement} | ||
import no.ndla.conceptapi.{TestData, TestEnvironment, UnitSuite} | ||
|
||
class V23__SubjectNameAsTagsTest extends UnitSuite with TestEnvironment { | ||
val fakeSubjects: List[TaxonomySubject] = List( | ||
TaxonomySubject( | ||
"urn:subject:1", | ||
"Naturfag", | ||
List( | ||
TaxonomyTranslation("Naturfag", "nb"), | ||
TaxonomyTranslation("Naturfagi", "nn"), | ||
TaxonomyTranslation("Science", "en"), | ||
TaxonomyTranslation("科学", "zh"), | ||
TaxonomyTranslation("Luonddufágga", "sma") | ||
) | ||
), | ||
TaxonomySubject( | ||
"urn:subject:2", | ||
"Matte", | ||
List( | ||
TaxonomyTranslation("Matematik", "nb"), | ||
TaxonomyTranslation("Matematiki", "nn"), | ||
TaxonomyTranslation("Math", "en") | ||
) | ||
) | ||
) | ||
|
||
val migration = new V23__SubjectNameAsTags(props, prefetchedSubjects = Some(fakeSubjects)) | ||
|
||
test("That we can get languages from a concept json string") { | ||
val concept = TestData.domainConcept.copy( | ||
title = List(Title("Tittel", "nb")), | ||
content = List(ConceptContent("Innhold", "sma")), | ||
tags = List(Tag(List("tag1", "tag2", "tag3"), "nn")), | ||
visualElement = List(VisualElement("zzz", "en")), | ||
metaImage = List(ConceptMetaImage("123", "zzz", "zh")) | ||
) | ||
val languages = migration.getLanguages(concept.asJson) | ||
languages should be(List("nb", "sma", "nn", "en", "zh")) | ||
} | ||
|
||
test("That adding tags works as expected") { | ||
val concept = TestData.domainConcept.copy( | ||
title = List(Title("Tittel", "nb")), | ||
content = List(ConceptContent("Innhold", "sma")), | ||
tags = List(Tag(List("nb"), "nb"), Tag(List("nn"), "nn"), Tag(List("en"), "en"), Tag(List("zh"), "zh")), | ||
visualElement = List(VisualElement("zzz", "en")), | ||
metaImage = List(ConceptMetaImage("123", "zzz", "zh")), | ||
subjectIds = Set("urn:subject:1", "urn:subject:2") | ||
) | ||
|
||
val result = migration.convertColumn(concept.asJson.noSpaces) | ||
CirceUtil.unsafeParseAs[Concept](result).tags.sortBy(_.language) should be( | ||
List( | ||
Tag(List("nb", "Naturfag", "Matematik"), "nb"), | ||
Tag(List("nn", "Naturfagi", "Matematiki"), "nn"), | ||
Tag(List("en", "Science", "Math"), "en"), | ||
Tag(List("zh", "科学", "Matte"), "zh"), | ||
Tag(List("Luonddufágga", "Matte"), "sma") | ||
).sortBy(_.language) | ||
) | ||
|
||
} | ||
} |