Skip to content

Commit

Permalink
Nouveaux JDD à référencer : ignore BDTOPO (#3364)
Browse files Browse the repository at this point in the history
* Nouveaux JDD à référencer : ignore BDTOPO

* Add test
  • Loading branch information
AntoineAugusti authored Aug 1, 2023
1 parent 410143a commit 90344dd
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 14 deletions.
42 changes: 29 additions & 13 deletions apps/transport/lib/jobs/new_datagouv_datasets_job.ex
Original file line number Diff line number Diff line change
Expand Up @@ -88,19 +88,35 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
DateTime.compare(datetime, dt_limit) == :gt
end

@doc """
Useful to ignore specific datasets/organizations.
iex> ignore_dataset?(%{"organization" => %{"id" => "5a83f81fc751df6f8573eb8a"}, "title" => "BDTOPO© - Chefs-Lieux pour le département de l'Eure-et-Loir"})
true
"""
def ignore_dataset?(%{"organization" => %{"id" => "5a83f81fc751df6f8573eb8a"}, "title" => title}) do
String.contains?(title, "BDTOPO")
end

def ignore_dataset?(%{}), do: false

def dataset_is_relevant?(%{} = dataset) do
match_on_dataset =
[&tags_is_relevant?/1, &description_is_relevant?/1, &title_is_relevant?/1]
|> Enum.map(& &1.(dataset))
|> Enum.any?()

match_on_resources =
dataset
|> Map.fetch!("resources")
|> Enum.map(&resource_is_relevant?/1)
|> Enum.any?()

match_on_dataset or match_on_resources
if ignore_dataset?(dataset) do
false
else
match_on_dataset =
[&tags_is_relevant?/1, &description_is_relevant?/1, &title_is_relevant?/1]
|> Enum.map(& &1.(dataset))
|> Enum.any?()

match_on_resources =
dataset
|> Map.fetch!("resources")
|> Enum.map(&resource_is_relevant?/1)
|> Enum.any?()

match_on_dataset or match_on_resources
end
end

defp title_is_relevant?(%{"title" => title}), do: string_matches?(title)
Expand Down Expand Up @@ -133,7 +149,7 @@ defmodule Transport.Jobs.NewDatagouvDatasetsJob do
MapSet.member?(@relevant_formats, String.downcase(format))
end

defp resource_schema_is_relevant?(%{"schema" => %{"name" => "etalab/schema-irve"}}), do: false
defp resource_schema_is_relevant?(%{"schema" => %{"name" => "etalab/schema-irve-statique"}}), do: false

defp resource_schema_is_relevant?(%{"schema" => %{"name" => schema_name}}) do
schema_name in Map.keys(Schemas.transport_schemas())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,23 @@ defmodule Transport.Test.Transport.Jobs.NewDatagouvDatasetsJobTest do

refute NewDatagouvDatasetsJob.dataset_is_relevant?(%{
base
| "resources" => [%{"format" => "csv", "schema" => %{"name" => "etalab/schema-irve"}, "description" => ""}]
| "resources" => [
%{"format" => "csv", "schema" => %{"name" => "etalab/schema-irve-statique"}, "description" => ""}
]
})

# Uses `ignore_dataset?/1` to ignore specific datasets
bdtopo_args =
Map.merge(base, %{
"title" => "BDTOPO© - Chefs-Lieux pour le département de l'Eure-et-Loir",
"tags" => ["transport"]
})

assert NewDatagouvDatasetsJob.dataset_is_relevant?(bdtopo_args)

refute NewDatagouvDatasetsJob.dataset_is_relevant?(
Map.merge(bdtopo_args, %{"organization" => %{"id" => "5a83f81fc751df6f8573eb8a"}})
)
end

test "filtered_datasets" do
Expand Down

0 comments on commit 90344dd

Please sign in to comment.