Skip to content

Commit

Permalink
Merge pull request #560 from NDLANO/grep-laereplan-index
Browse files Browse the repository at this point in the history
search-api: Add grep læreplan to indexed search
jnatten authored Jan 2, 2025
2 parents 47d4e7d + 315a10d commit ddffdf4
Showing 12 changed files with 258 additions and 89 deletions.
Original file line number Diff line number Diff line change
@@ -30,17 +30,20 @@ trait GrepApiClient {
import props.GrepApiUrl
private val GrepApiEndpoint = s"$GrepApiUrl/kl06/v201906"

private def getAllKjerneelementer: Try[List[GrepElement]] =
get[List[GrepElement]](s"$GrepApiEndpoint/kjerneelementer-lk20/").map(_.distinct)
private def getAllKjerneelementer: Try[List[GrepKjerneelement]] =
get[List[GrepKjerneelement]](s"$GrepApiEndpoint/kjerneelementer-lk20/").map(_.distinct)

private def getAllKompetansemaal: Try[List[GrepElement]] =
get[List[GrepElement]](s"$GrepApiEndpoint/kompetansemaal-lk20/").map(_.distinct)
private def getAllKompetansemaal: Try[List[GrepKompetansemaal]] =
get[List[GrepKompetansemaal]](s"$GrepApiEndpoint/kompetansemaal-lk20/").map(_.distinct)

private def getAllKompetansemaalSett: Try[List[GrepElement]] =
get[List[GrepElement]](s"$GrepApiEndpoint/kompetansemaalsett-lk20/").map(_.distinct)
private def getAllKompetansemaalSett: Try[List[GrepKompetansemaalSett]] =
get[List[GrepKompetansemaalSett]](s"$GrepApiEndpoint/kompetansemaalsett-lk20/").map(_.distinct)

private def getAllTverrfagligeTemaer: Try[List[GrepElement]] =
get[List[GrepElement]](s"$GrepApiEndpoint/tverrfaglige-temaer-lk20/").map(_.distinct)
private def getAllTverrfagligeTemaer: Try[List[GrepTverrfagligTema]] =
get[List[GrepTverrfagligTema]](s"$GrepApiEndpoint/tverrfaglige-temaer-lk20/").map(_.distinct)

private def getAllLaereplaner: Try[List[GrepLaererplan]] =
get[List[GrepLaererplan]](s"$GrepApiEndpoint/laereplaner-lk20/").map(_.distinct)

// NOTE: We add a helper so we don't have to provide `()` where this is used :^)
val getGrepBundle: () => Try[GrepBundle] = () => _getGrepBundle(())
@@ -65,17 +68,20 @@ trait GrepApiClient {
val kompetansemaal = tryToFuture(() => getAllKompetansemaal)
val kompetansemaalsett = tryToFuture(() => getAllKompetansemaalSett)
val tverrfagligeTemaer = tryToFuture(() => getAllTverrfagligeTemaer)
val laererplaner = tryToFuture(() => getAllLaereplaner)

val x = for {
kjerne <- kjerneelementer
kompetanse <- kompetansemaal
kompetansesett <- kompetansemaalsett
tverrfag <- tverrfagligeTemaer
laere <- laererplaner
} yield GrepBundle(
kjerneelementer = kjerne,
kompetansemaal = kompetanse,
kompetansemaalsett = kompetansesett,
tverrfagligeTemaer = tverrfag
tverrfagligeTemaer = tverrfag,
laereplaner = laere
)

Try(Await.result(x, Duration(300, "seconds"))) match {
Original file line number Diff line number Diff line change
@@ -16,7 +16,8 @@ import sttp.tapir.Schema.annotations.description
@description("Information about a single grep search result entry")
case class GrepResultDTO(
@description("The grep code") code: String,
@description("The greps title") title: TitleDTO
@description("The greps title") title: TitleDTO,
@description("The grep laereplan") laereplanCode: Option[String]
)

object GrepResultDTO {
Original file line number Diff line number Diff line change
@@ -8,13 +8,19 @@
package no.ndla.searchapi.model.grep

case class GrepBundle(
kjerneelementer: List[GrepElement],
kompetansemaal: List[GrepElement],
kjerneelementer: List[GrepKjerneelement],
kompetansemaal: List[GrepKompetansemaal],
kompetansemaalsett: List[GrepElement],
tverrfagligeTemaer: List[GrepElement]
tverrfagligeTemaer: List[GrepElement],
laereplaner: List[GrepLaererplan]
) {

val grepContext: List[GrepElement] = kjerneelementer ++ kompetansemaal ++ kompetansemaalsett ++ tverrfagligeTemaer
val grepContext: List[GrepElement] =
kjerneelementer ++
kompetansemaal ++
kompetansemaalsett ++
tverrfagligeTemaer ++
laereplaner

val grepContextByCode: Map[String, GrepElement] =
Map.from(grepContext.map(elem => elem.kode -> elem))
Original file line number Diff line number Diff line change
@@ -7,12 +7,56 @@

package no.ndla.searchapi.model.grep

import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}
import io.circe.{Decoder, Encoder}
import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder}

sealed trait GrepElement {
val kode: String
val tittel: Seq[GrepTitle]
}

sealed trait BelongsToLaerePlan {
val tilhoerer_laereplan: BelongsToObj
}

case class GrepKjerneelement(kode: String, tittel: Seq[GrepTitle], tilhoerer_laereplan: BelongsToObj)
extends GrepElement
with BelongsToLaerePlan
object GrepKjerneelement {
implicit val encoder: Encoder[GrepKjerneelement] = deriveEncoder
implicit val decoder: Decoder[GrepKjerneelement] = deriveDecoder
}

case class GrepElement(kode: String, tittel: Seq[GrepTitle])
case class BelongsToObj(kode: String)
object BelongsToObj {
implicit val encoder: Encoder[BelongsToObj] = deriveEncoder
implicit val decoder: Decoder[BelongsToObj] = deriveDecoder
}

case class GrepKompetansemaal(kode: String, tittel: Seq[GrepTitle], tilhoerer_laereplan: BelongsToObj)
extends GrepElement
with BelongsToLaerePlan
object GrepKompetansemaal {
implicit val encoder: Encoder[GrepKompetansemaal] = deriveEncoder
implicit val decoder: Decoder[GrepKompetansemaal] = deriveDecoder
}

case class GrepKompetansemaalSett(kode: String, tittel: Seq[GrepTitle], tilhoerer_laereplan: BelongsToObj)
extends GrepElement
with BelongsToLaerePlan
object GrepKompetansemaalSett {
implicit val encoder: Encoder[GrepKompetansemaalSett] = deriveEncoder
implicit val decoder: Decoder[GrepKompetansemaalSett] = deriveDecoder
}

case class GrepLaererplan(kode: String, tittel: Seq[GrepTitle]) extends GrepElement
object GrepLaererplan {
implicit val encoder: Encoder[GrepLaererplan] = deriveEncoder
implicit val decoder: Decoder[GrepLaererplan] = deriveDecoder
}

object GrepElement {
implicit val encoder: Encoder[GrepElement] = deriveEncoder
implicit val decoder: Decoder[GrepElement] = deriveDecoder
case class GrepTverrfagligTema(kode: String, tittel: Seq[GrepTitle]) extends GrepElement
object GrepTverrfagligTema {
implicit val encoder: Encoder[GrepTverrfagligTema] = deriveEncoder
implicit val decoder: Decoder[GrepTverrfagligTema] = deriveDecoder
}
Original file line number Diff line number Diff line change
@@ -15,7 +15,8 @@ import no.ndla.search.model.SearchableLanguageValues
case class SearchableGrepElement(
code: String,
title: SearchableLanguageValues,
defaultTitle: Option[String]
defaultTitle: Option[String],
laereplanCode: Option[String]
)

object SearchableGrepElement {
Original file line number Diff line number Diff line change
@@ -35,10 +35,12 @@ trait GrepIndexService {

override def getMapping: MappingDefinition = {
val fields = List(
keywordField("code").normalizer("lower")
keywordField("defaultTitle"),
keywordField("code").normalizer("lower"),
keywordField("laereplanCode").normalizer("lower")
)
val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true)

val dynamics = generateLanguageSupportedDynamicTemplates("title", keepRaw = true)
properties(fields).dynamicTemplates(dynamics)
}

Original file line number Diff line number Diff line change
@@ -32,7 +32,7 @@ import no.ndla.searchapi.model.search.{SearchType, SearchableGrepElement}
import scala.util.{Success, Try}

trait GrepSearchService {
this: Props & SearchService & GrepIndexService & BaseIndexService & Elastic4sClient =>
this: Props & SearchService & GrepIndexService & BaseIndexService & Elastic4sClient & SearchConverterService =>
val grepSearchService: GrepSearchService

class GrepSearchService extends SearchService {
@@ -49,34 +49,52 @@ trait GrepSearchService {
case Some(ByCodeDesc) => sortField("code", Desc, missingLast = false)
}

protected def buildQuery(input: GrepSearchInputDTO, searchLanguage: String): Query = {
val query = input.query
.map { q =>
val langQueryFunc = (fieldName: String, boost: Double) =>
buildSimpleStringQueryForField(
q,
fieldName,
boost,
searchLanguage,
fallback = true,
searchDecompounded = true
)

val codeQueries = boolQuery().should(
prefixQuery("code", q.underlying).boost(50),
matchQuery("code", q.underlying).boost(10),
termQuery("code", q.underlying).boost(100)
)
val titleQuery = langQueryFunc("title", 6)
protected def buildCodeQueries(codePrefixes: Set[String], codes: Set[String]): Option[Query] = {

val prefixQueries = (codePrefixes ++ codes).toList.flatMap { prefix =>
List(
prefixQuery("code", prefix).boost(50),
prefixQuery("laereplanCode", prefix).boost(50)
)
}

val codeQueries = codes.flatMap { query =>
List(
matchQuery("code", query).boost(50),
termQuery("code", query).boost(50),
matchQuery("laereplanCode", query).boost(50),
termQuery("laereplanCode", query).boost(50)
)
}

val queries = prefixQueries ++ codeQueries
Option.when(queries.nonEmpty) { boolQuery().should(queries) }
}

val onlyCodeQuery = boolQuery()
.must(codeQueries)
.not(titleQuery)
def extractCodesFromQuery(query: String): Set[String] = {
val regex = """\b([A-Za-z]{2,3}\d{1,4}(?:-\d{1,4})?)\b""".r
regex.findAllIn(query).toSet
}

def extractCodePrefixesFromQuery(query: String): Set[String] = {
val regex = """\b([A-Za-z]{2,3}(\d{1,4})?(?:-\d{1,4})?)\b""".r
regex.findAllIn(query).toSet
}

protected def buildQuery(input: GrepSearchInputDTO, searchLanguage: String): Query = {
val query = input.query match {
case Some(q) =>
val codes = extractCodesFromQuery(q.underlying)
val codePrefixes = extractCodePrefixesFromQuery(q.underlying)
val codeQueries = buildCodeQueries(codePrefixes, codes)
val titleQuery = languageQuery(q, "title", 6, searchLanguage)

boolQuery()
.must(boolQuery().should(titleQuery, onlyCodeQuery))
}
.getOrElse(boolQuery())
.withShould(titleQuery)
.withShould(codeQueries)
.minimumShouldMatch(1)
case None => boolQuery()
}
query.filter(getFilters(input))
}

@@ -109,9 +127,8 @@ trait GrepSearchService {
val searchPage = input.page.getOrElse(1)
val searchPageSize = input.pageSize.getOrElse(10)
val pagination = getStartAtAndNumResults(page = searchPage, pageSize = searchPageSize).?

val sort = grepSortDefinition(input.sort, searchLanguage)
val filteredQuery = buildQuery(input, searchLanguage)
val sort = grepSortDefinition(input.sort, searchLanguage)
val filteredQuery = buildQuery(input, searchLanguage)

val searchToExecute = search(searchIndex)
.query(filteredQuery)
@@ -133,7 +150,7 @@ trait GrepSearchService {
}
}

def hitToResult(hit: SearchHit, language: String): Try[GrepResultDTO] = {
private def hitToResult(hit: SearchHit, language: String): Try[GrepResultDTO] = {
val jsonString = hit.sourceAsString
val searchable = CirceUtil.tryParseAs[SearchableGrepElement](jsonString).?
val titleLv = findByLanguageOrBestEffort(searchable.title.languageValues, language)
@@ -143,12 +160,13 @@ trait GrepSearchService {
Success(
GrepResultDTO(
code = searchable.code,
title = title
title = title,
laereplanCode = searchable.laereplanCode
)
)
}

def getGrepHits(response: RequestSuccess[SearchResponse], language: String): Try[List[GrepResultDTO]] = {
private def getGrepHits(response: RequestSuccess[SearchResponse], language: String): Try[List[GrepResultDTO]] = {
response.result.hits.hits.toList.traverse { hit => hitToResult(hit, language) }
}
}
Original file line number Diff line number Diff line change
@@ -255,9 +255,8 @@ trait SearchConverterService {

}

def asSearchableGrep(grepElement: GrepElement): Try[SearchableGrepElement] = {
val defaultTitle = grepElement.tittel.find(_.spraak == "default")
val titles = grepElement.tittel.flatMap(gt => {
def convertGrepTitleToLanguageValue(grepElement: GrepElement): Seq[LanguageValue[String]] =
grepElement.tittel.flatMap(gt => {
ISO639.get6391CodeFor6392Code(gt.spraak) match {
case Some(convertedLanguage) =>
Some(LanguageValue(language = convertedLanguage, value = gt.verdi.trim))
@@ -268,13 +267,21 @@ trait SearchConverterService {
}
})

val title = SearchableLanguageValues.fromFields(titles.distinctBy(_.language))
def asSearchableGrep(grepElement: GrepElement): Try[SearchableGrepElement] = {
val laererplan = grepElement match {
case lp: BelongsToLaerePlan => Some(lp.tilhoerer_laereplan.kode)
case _ => None
}
val defaultTitle = grepElement.tittel.find(_.spraak == "default")
val titles = convertGrepTitleToLanguageValue(grepElement)
val title = SearchableLanguageValues.fromFields(titles)

Success(
SearchableGrepElement(
code = grepElement.kode,
title = title,
defaultTitle = defaultTitle.map(_.verdi)
defaultTitle = defaultTitle.map(_.verdi),
laereplanCode = laererplan
)
)
}
Original file line number Diff line number Diff line change
@@ -73,6 +73,9 @@ trait SearchService {
}
}

def languageQuery(query: NonEmptyString, field: String, boost: Double, language: String): SimpleStringQuery =
buildSimpleStringQueryForField(query, field, boost, language, fallback = true, searchDecompounded = true)

def buildSimpleStringQueryForField(
query: NonEmptyString,
field: String,
25 changes: 18 additions & 7 deletions search-api/src/test/scala/no/ndla/searchapi/TestData.scala
Original file line number Diff line number Diff line change
@@ -50,7 +50,15 @@ import no.ndla.language.Language.DefaultLanguage
import no.ndla.search.model.domain.EmbedValues
import no.ndla.search.model.{LanguageValue, SearchableLanguageList, SearchableLanguageValues}
import no.ndla.searchapi.model.domain.*
import no.ndla.searchapi.model.grep.{GrepBundle, GrepElement, GrepTitle}
import no.ndla.searchapi.model.grep.{
BelongsToObj,
GrepBundle,
GrepKjerneelement,
GrepKompetansemaal,
GrepLaererplan,
GrepTitle,
GrepTverrfagligTema
}
import no.ndla.searchapi.model.search.*
import no.ndla.searchapi.model.search.settings.{MultiDraftSearchSettings, SearchSettings}
import no.ndla.searchapi.model.taxonomy.*
@@ -1573,21 +1581,24 @@ object TestData {
kjerneelementer = List.empty,
kompetansemaal = List.empty,
kompetansemaalsett = List.empty,
tverrfagligeTemaer = List.empty
tverrfagligeTemaer = List.empty,
laereplaner = List.empty
)

val grepBundle: GrepBundle = emptyGrepBundle.copy(
kjerneelementer = List(
GrepElement("KE12", Seq(GrepTitle("default", "Utforsking og problemløysing"))),
GrepElement("KE34", Seq(GrepTitle("default", "Abstraksjon og generalisering")))
GrepKjerneelement("KE12", Seq(GrepTitle("default", "Utforsking og problemløysing")), BelongsToObj("LP1")),
GrepKjerneelement("KE34", Seq(GrepTitle("default", "Abstraksjon og generalisering")), BelongsToObj("LP1"))
),
kompetansemaal = List(
GrepElement(
GrepKompetansemaal(
"KM123",
Seq(GrepTitle("default", "bruke ulike kilder på en kritisk, hensiktsmessig og etterrettelig måte"))
Seq(GrepTitle("default", "bruke ulike kilder på en kritisk, hensiktsmessig og etterrettelig måte")),
BelongsToObj("LP1")
)
),
tverrfagligeTemaer = List(GrepElement("TT2", Seq(GrepTitle("default", "Demokrati og medborgerskap"))))
tverrfagligeTemaer = List(GrepTverrfagligTema("TT2", Seq(GrepTitle("default", "Demokrati og medborgerskap")))),
laereplaner = List(GrepLaererplan("LP1", Seq(GrepTitle("default", "Læreplan i norsk (NOR01-04)"))))
)

val searchSettings: SearchSettings = SearchSettings(
Loading

0 comments on commit ddffdf4

Please sign in to comment.