Skip to content

Commit

Permalink
Merge pull request #260 from concord-consortium/187885620-split-gloss…
Browse files Browse the repository at this point in the history
…ary-data-into-two-columns

feat: Split glossary data column in to three columns [PT-187885620]
  • Loading branch information
dougmartin authored Jul 17, 2024
2 parents 3f8758c + 1ed5c1c commit 93e351b
Show file tree
Hide file tree
Showing 13 changed files with 176 additions and 28 deletions.
2 changes: 2 additions & 0 deletions query-creator/create-query/steps/aws.js
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,8 @@ exports.generateSQL = (runnableInfo, usageReport, authDomain, sourceKey, hideNam
resourceColumns = resourceColumns.concat([
{name: `res_${resIndex}_name`,
value: `'${escapeSingleQuote(name)}'`},
{name: `res_${resIndex}_offering_id`,
value: `learners_and_answers_${resIndex}.offering_id`},
{name: `res_${resIndex}_learner_id`,
value: `learners_and_answers_${resIndex}.learner_id`},
{name: `res_${resIndex}_remote_endpoint`,
Expand Down
16 changes: 16 additions & 0 deletions query-creator/create-query/tests/unit/generated-sql.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ one_row_table_for_join as (SELECT null AS empty)
null AS teacher_states,
null AS teacher_emails,
null AS res_1_name,
null AS res_1_offering_id,
null AS res_1_learner_id,
null AS res_1_remote_endpoint,
null AS res_1_resource_url,
Expand All @@ -88,6 +89,7 @@ one_row_table_for_join as (SELECT null AS empty)
null AS res_1_num_required_questions,
null AS res_1_num_required_answers,
null AS res_2_name,
null AS res_2_offering_id,
null AS res_2_learner_id,
null AS res_2_remote_endpoint,
null AS res_2_resource_url,
Expand Down Expand Up @@ -146,6 +148,7 @@ UNION ALL
null AS teacher_states,
null AS teacher_emails,
null AS res_1_name,
null AS res_1_offering_id,
null AS res_1_learner_id,
null AS res_1_remote_endpoint,
null AS res_1_resource_url,
Expand All @@ -156,6 +159,7 @@ UNION ALL
null AS res_1_num_required_questions,
null AS res_1_num_required_answers,
null AS res_2_name,
null AS res_2_offering_id,
null AS res_2_learner_id,
null AS res_2_remote_endpoint,
null AS res_2_resource_url,
Expand Down Expand Up @@ -214,6 +218,7 @@ UNION ALL
unique_user_class.teacher_states,
unique_user_class.teacher_emails,
'test activity' AS res_1_name,
learners_and_answers_1.offering_id AS res_1_offering_id,
learners_and_answers_1.learner_id AS res_1_learner_id,
learners_and_answers_1.remote_endpoint AS res_1_remote_endpoint,
learners_and_answers_1.resource_url AS res_1_resource_url,
Expand All @@ -224,6 +229,7 @@ round(100.0 * learners_and_answers_1.num_answers / activities_1.num_questions, 1
learners_and_answers_1.num_required_questions AS res_1_num_required_questions,
learners_and_answers_1.num_required_answers AS res_1_num_required_answers,
'test activity 2' AS res_2_name,
learners_and_answers_2.offering_id AS res_2_offering_id,
learners_and_answers_2.learner_id AS res_2_learner_id,
learners_and_answers_2.remote_endpoint AS res_2_remote_endpoint,
learners_and_answers_2.resource_url AS res_2_resource_url,
Expand Down Expand Up @@ -351,6 +357,7 @@ one_row_table_for_join as (SELECT null AS empty)
null AS teacher_states,
null AS teacher_emails,
null AS res_1_name,
null AS res_1_offering_id,
null AS res_1_learner_id,
null AS res_1_remote_endpoint,
null AS res_1_resource_url,
Expand All @@ -361,6 +368,7 @@ one_row_table_for_join as (SELECT null AS empty)
null AS res_1_num_required_questions,
null AS res_1_num_required_answers,
null AS res_2_name,
null AS res_2_offering_id,
null AS res_2_learner_id,
null AS res_2_remote_endpoint,
null AS res_2_resource_url,
Expand Down Expand Up @@ -419,6 +427,7 @@ UNION ALL
null AS teacher_states,
null AS teacher_emails,
null AS res_1_name,
null AS res_1_offering_id,
null AS res_1_learner_id,
null AS res_1_remote_endpoint,
null AS res_1_resource_url,
Expand All @@ -429,6 +438,7 @@ UNION ALL
null AS res_1_num_required_questions,
null AS res_1_num_required_answers,
null AS res_2_name,
null AS res_2_offering_id,
null AS res_2_learner_id,
null AS res_2_remote_endpoint,
null AS res_2_resource_url,
Expand Down Expand Up @@ -487,6 +497,7 @@ UNION ALL
unique_user_class.teacher_states,
unique_user_class.teacher_emails,
'test activity' AS res_1_name,
learners_and_answers_1.offering_id AS res_1_offering_id,
learners_and_answers_1.learner_id AS res_1_learner_id,
learners_and_answers_1.remote_endpoint AS res_1_remote_endpoint,
learners_and_answers_1.resource_url AS res_1_resource_url,
Expand All @@ -497,6 +508,7 @@ round(100.0 * learners_and_answers_1.num_answers / activities_1.num_questions, 1
learners_and_answers_1.num_required_questions AS res_1_num_required_questions,
learners_and_answers_1.num_required_answers AS res_1_num_required_answers,
'test activity 2' AS res_2_name,
learners_and_answers_2.offering_id AS res_2_offering_id,
learners_and_answers_2.learner_id AS res_2_learner_id,
learners_and_answers_2.remote_endpoint AS res_2_remote_endpoint,
learners_and_answers_2.resource_url AS res_2_resource_url,
Expand Down Expand Up @@ -623,6 +635,7 @@ SELECT
unique_user_class.teacher_states,
unique_user_class.teacher_emails,
'test activity' AS res_1_name,
learners_and_answers_1.offering_id AS res_1_offering_id,
learners_and_answers_1.learner_id AS res_1_learner_id,
learners_and_answers_1.remote_endpoint AS res_1_remote_endpoint,
learners_and_answers_1.resource_url AS res_1_resource_url,
Expand All @@ -633,6 +646,7 @@ SELECT
learners_and_answers_1.num_required_questions AS res_1_num_required_questions,
learners_and_answers_1.num_required_answers AS res_1_num_required_answers,
'test activity 2' AS res_2_name,
learners_and_answers_2.offering_id AS res_2_offering_id,
learners_and_answers_2.learner_id AS res_2_learner_id,
learners_and_answers_2.remote_endpoint AS res_2_remote_endpoint,
learners_and_answers_2.resource_url AS res_2_resource_url,
Expand Down Expand Up @@ -731,6 +745,7 @@ one_row_table_for_join as (SELECT null AS empty)
unique_user_class.teacher_states,
unique_user_class.teacher_emails,
'test activity' AS res_1_name,
learners_and_answers_1.offering_id AS res_1_offering_id,
learners_and_answers_1.learner_id AS res_1_learner_id,
learners_and_answers_1.remote_endpoint AS res_1_remote_endpoint,
learners_and_answers_1.resource_url AS res_1_resource_url,
Expand All @@ -741,6 +756,7 @@ round(100.0 * learners_and_answers_1.num_answers / activities_1.num_questions, 1
learners_and_answers_1.num_required_questions AS res_1_num_required_questions,
learners_and_answers_1.num_required_answers AS res_1_num_required_answers,
'test activity 2' AS res_2_name,
learners_and_answers_2.offering_id AS res_2_offering_id,
learners_and_answers_2.learner_id AS res_2_learner_id,
learners_and_answers_2.remote_endpoint AS res_2_remote_endpoint,
learners_and_answers_2.resource_url AS res_2_resource_url,
Expand Down
3 changes: 3 additions & 0 deletions server/config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ config :report_server, :report_service,
url: System.get_env("REPORT_SERVICE_URL") || "https://us-central1-report-service-pro.cloudfunctions.net/api", # production
token: report_service_token

config :report_server, :portal_report,
url: System.get_env("PORTAL_REPORT_URL") || "https://portal-report.concord.org/branch/master/" # production (yes, prod uses master)

if config_env() == :prod do
# The secret key base is used to sign/encrypt cookies and other secrets.
# A default value is used in config/dev.exs and config/test.exs but you
Expand Down
1 change: 1 addition & 0 deletions server/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ services:
- REPORT_SERVICE_URL=${REPORT_SERVICE_URL:-}
- REPORT_SERVICE_TOKEN=${REPORT_SERVICE_TOKEN:-}
- PORTAL_URL=${PORTAL_URL:-https://learn.portal.staging.concord.org}
- PORTAL_REPORT_URL=${PORTAL_REPORT_URL:https://portal-report.concord.org/branch/master/}
- TOKEN_SERVICE_URL=${TOKEN_SERVICE_URL:-https://token-service-staging.firebaseapp.com/api/v1/resources}
- TOKEN_SERVICE_PRIVATE_BUCKET=${TOKEN_SERVICE_PRIVATE_BUCKET:-token-service-files-private}
- OUTPUT_BUCKET=${OUTPUT_BUCKET:-report-server-output}
Expand Down
5 changes: 3 additions & 2 deletions server/lib/report_server/post_processing/job.ex
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ defmodule ReportServer.PostProcessing.Job do
alias ReportServer.PostProcessing.Steps.Helpers

@derive {Jason.Encoder, only: [:id, :steps, :status, :result]}
defstruct id: nil, query_id: nil, steps: [], status: :queued, ref: nil, result: nil, rows_processed: 0, started_at: 0
defstruct id: nil, query_id: nil, steps: [], status: :queued, ref: nil, result: nil, rows_processed: 0, started_at: 0, portal_url: nil

def run(mode, job, query_result, job_server_pid) do
case Aws.get_file_stream(mode, query_result.output_location) do
Expand All @@ -25,7 +25,8 @@ defmodule ReportServer.PostProcessing.Job do
input_header_map: header_map,
output_header: row,
output_header_map: header_map,
rows_processed: 0
rows_processed: 0,
portal_url: job.portal_url
}
params = Enum.reduce(job.steps, params, fn step, acc -> step.init.(acc) end)
{[params.output_header], increment_rows_processed(params, job_server_pid, job.id)}
Expand Down
2 changes: 1 addition & 1 deletion server/lib/report_server/post_processing/job_params.ex
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
defmodule ReportServer.PostProcessing.JobParams do
defstruct mode: nil, input_header: [], input_header_map: %{}, output_header: [], output_header_map: %{}, step_state: %{}, rows_processed: 0
defstruct mode: nil, input_header: [], input_header_map: %{}, output_header: [], output_header_map: %{}, step_state: %{}, rows_processed: 0, portal_url: nil
end
8 changes: 4 additions & 4 deletions server/lib/report_server/post_processing/job_server.ex
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ defmodule ReportServer.PostProcessing.JobServer do
GenServer.cast(get_server_pid(query_id), :request_job_status)
end

def add_job(query_id, query_result, steps) do
GenServer.cast(get_server_pid(query_id), {:add_job, query_result, steps})
def add_job(query_id, query_result, steps, portal_url) do
GenServer.cast(get_server_pid(query_id), {:add_job, query_result, steps, portal_url})
end

def query_topic(query_id), do: "job_server_#{query_id}"
Expand Down Expand Up @@ -71,8 +71,8 @@ defmodule ReportServer.PostProcessing.JobServer do
{:noreply, state}
end

def handle_cast({:add_job, query_result, steps}, state = %{mode: mode, jobs: jobs}) do
job = %Job{id: length(jobs) + 1, query_id: query_result.id, steps: steps, status: :started, started_at: :os.system_time(:millisecond), ref: nil, result: nil}
def handle_cast({:add_job, query_result, steps, portal_url}, state = %{mode: mode, jobs: jobs}) do
job = %Job{id: length(jobs) + 1, query_id: query_result.id, steps: steps, status: :started, started_at: :os.system_time(:millisecond), portal_url: portal_url, ref: nil, result: nil}
step_labels = Enum.map(steps, &(&1.label)) |> Enum.join(", ")
Logger.info("Adding job ##{job.id} for query #{query_result.id} (#{step_labels})")

Expand Down
120 changes: 103 additions & 17 deletions server/lib/report_server/post_processing/steps/glossary_data.ex
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,21 @@ defmodule ReportServer.PostProcessing.Steps.GlossaryData do
alias ReportServer.PostProcessing.JobParams
alias ReportServer.PostProcessing.Step
alias ReportServer.PostProcessing.Steps.Helpers
alias ReportServerWeb.ReportService
alias ReportServerWeb.{ReportService, PortalReport}

@id "glossary_data"
@learner_id "learner_id"
@remote_endpoint "remote_endpoint"
@resource_url "resource_url"
@offering_id "offering_id"

@staging_firebase_app "report-service-dev"
@prod_firebase_app "report-service-pro"

def step do
%Step{
id: @id,
label: "Add glossary data column",
label: "Add glossary definition and audio link columns",
init: &init/1,
process_row: &process_row/3
}
Expand All @@ -23,10 +28,13 @@ defmodule ReportServer.PostProcessing.Steps.GlossaryData do
# get all the resource columns
all_resource_cols = Helpers.get_resource_cols(params)

# add a column after each resource url column
# add three glossary columns after each learner id column
params = Enum.reduce(all_resource_cols, params, fn {resource, resource_cols}, acc ->
{remote_endpoint_col, _index} = resource_cols[@remote_endpoint]
Helpers.add_output_column(acc, glossary_data_col(resource), :after, remote_endpoint_col)
{learner_id_col, _index} = resource_cols[@learner_id]
acc
|> Helpers.add_output_column(glossary_audio_link_col(resource), :after, learner_id_col)
|> Helpers.add_output_column(glossary_audio_definitions_col(resource), :after, learner_id_col)
|> Helpers.add_output_column(glossary_text_definitions_col(resource), :after, learner_id_col)
end)

step_state = Map.put(step_state, @id, all_resource_cols)
Expand All @@ -38,39 +46,73 @@ defmodule ReportServer.PostProcessing.Steps.GlossaryData do
# copy headers to new columns
all_resource_cols = Map.get(step_state, @id)
Enum.reduce(all_resource_cols, row, fn {resource, resource_cols}, {input, output} ->
{_remote_endpoint_col, index} = resource_cols[@remote_endpoint]
{input, Map.put(output, glossary_data_col(resource), input[index])}
{_learner_id_col, index} = resource_cols[@learner_id]
output = output
|> Map.put(glossary_audio_link_col(resource), input[index])
|> Map.put(glossary_audio_definitions_col(resource), input[index])
|> Map.put(glossary_text_definitions_col(resource), input[index])
{input, output}
end)
end

# data rows
def process_row(%JobParams{mode: mode, step_state: step_state}, row, _data_row? = true) do
def process_row(params = %JobParams{mode: mode, step_state: step_state, portal_url: portal_url}, row, _data_row? = true) do
all_resource_cols = Map.get(step_state, @id)

Enum.reduce(all_resource_cols, row, fn {resource, resource_cols}, {input, output} ->
{_remote_endpoint_col, remote_endpoint_col_index} = resource_cols[@remote_endpoint]
{_resource_url_col, resource_url_col_index} = resource_cols[@resource_url]
glossary_data = case get_glossary_data(mode, input[remote_endpoint_col_index], input[resource_url_col_index]) do
{:ok, {key, plugin_state}} -> Jason.encode!(%{"key" => key, "plugin_state" => plugin_state})
_ -> ""
remote_endpoint = get_resource_col(input, resource_cols, @remote_endpoint)
resource_url = get_resource_col(input, resource_cols, @resource_url)
offering_id = get_resource_col(input, resource_cols, @offering_id)
student_id = Helpers.get_input_value(params, input, "student_id")
class_id = Helpers.get_input_value(params, input, "class_id")

portal_uri = URI.parse(remote_endpoint)
firebase_app = case portal_uri.host do
"learn.concord.org" -> @prod_firebase_app
"ngss-assessment.portal.concord.org" -> @prod_firebase_app
_ -> @staging_firebase_app
end

output = case get_glossary_data(mode, remote_endpoint, resource_url) do
{:ok, source, key, word_definitions, audio_definitions} ->
audio_link_opts = [
auth_domain: portal_url, # authenticate with the portal the report server is authenticated to
firebase_app: firebase_app,
source: source,
portal_url: "#{portal_uri.scheme}://#{portal_uri.host}", # portal url of resource
class_id: class_id,
offering_id: offering_id,
student_id: student_id,
key: key
]
output
|> Map.put(glossary_audio_link_col(resource), get_audio_link(audio_definitions, audio_link_opts))
|> Map.put(glossary_audio_definitions_col(resource), get_audio_definitions(audio_definitions))
|> Map.put(glossary_text_definitions_col(resource), get_word_definitions(word_definitions))
_ ->
output
end
{input, Map.put(output, glossary_data_col(resource), glossary_data)}

{input, output}
end)
end

defp get_glossary_data(mode, remote_endpoint, resource_url) do
source = URI.parse(resource_url).host
with {:ok, plugin_states} <- ReportService.get_plugin_states(mode, source, remote_endpoint),
{:ok, glossary_plugin_key_and_state} <- get_first_glossary_plugin_key_and_state(plugin_states) do
{:ok, glossary_plugin_key_and_state}
{:ok, {key, plugin_state}} <- get_first_glossary_plugin_key_and_state(plugin_states),
{:ok, {word_definitions, audio_definitions}} <- parse_plugin_state(plugin_state) do
{:ok, source, key, word_definitions, audio_definitions}
else
{:error, error} ->
Logger.error(error)
{:error, error}
end
end

defp glossary_data_col(resource), do: "#{resource}_glossary_data"
defp glossary_text_definitions_col(resource), do: "#{resource}_glossary_text_definitions"
defp glossary_audio_definitions_col(resource), do: "#{resource}_glossary_audio_definitions"
defp glossary_audio_link_col(resource), do: "#{resource}_glossary_audio_link"

defp get_first_glossary_plugin_key_and_state(plugin_states) do
first_plugin_state = Enum.find(plugin_states, fn {_k, v} ->
Expand All @@ -82,4 +124,48 @@ defmodule ReportServer.PostProcessing.Steps.GlossaryData do
{:error, "No glossary plugin state found"}
end
end

defp parse_plugin_state(%{"definitions" => all_definitions}) when is_map(all_definitions) do
{all_word_definitions, all_audio_definitions} = Enum.reduce(all_definitions, {%{}, []}, fn ({word, definitions}, {word_definitions, audio_definitions}) ->
non_audio_definitions = Enum.filter(definitions, fn definition ->
!String.starts_with?(definition, "recordingData://")
end) |> Enum.reverse()
audio_definitions = if length(non_audio_definitions) < length(definitions) do
[word | audio_definitions]
else
audio_definitions
end
|> Enum.sort()
{Map.put(word_definitions, word, non_audio_definitions), audio_definitions}
end)
all_word_definitions = all_word_definitions
|> Enum.filter(fn {_k, word_definitions} -> length(word_definitions) > 0 end)
{:ok, {all_word_definitions, all_audio_definitions}}
end
defp parse_plugin_state(_) do
{:ok, %{}, []}
end

defp get_audio_link(audio_definitions, _opts ) when length(audio_definitions) < 1, do: ""
defp get_audio_link(_audio_definitions, opts), do: PortalReport.glossary_audio_link(opts)

defp get_word_definitions(word_definitions) when map_size(word_definitions) == 0, do: ""
defp get_word_definitions(word_definitions) do
word_definitions
|> Enum.map(fn {word, definitions} ->
quoted_definitions = definitions
|> Enum.map(&("\"#{&1}\""))
|> Enum.join("; ")
"#{word}: #{quoted_definitions}"
end)
|> Enum.join("\n")
end

defp get_audio_definitions(audio_definitions), do: Enum.join(audio_definitions, " ")

defp get_resource_col(input, resource_cols, column_name) do
{_col, index} = resource_cols[column_name]
input[index]
end

end
8 changes: 8 additions & 0 deletions server/lib/report_server/post_processing/steps/helpers.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ defmodule ReportServer.PostProcessing.Steps.Helpers do

@col_regex ~r/(?<res>res_\d+)_(?<question_id>.+)_/

def get_input_value(%JobParams{input_header_map: input_header_map}, input, column_name) do
if Map.has_key?(input_header_map, column_name) do
input[input_header_map[column_name]]
else
nil
end
end

def get_header_map(list) do
Enum.with_index(list) |> Map.new()
end
Expand Down
Loading

0 comments on commit 93e351b

Please sign in to comment.