Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Average Scroll Depth Metric: imported data #4915

Merged
merged 16 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 75 additions & 13 deletions lib/plausible/exports.ex
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ defmodule Plausible.Exports do
import Ecto.Query

@doc "Schedules CSV export job to S3 storage"
@spec schedule_s3_export(pos_integer, String.t()) :: {:ok, Oban.Job.t()} | {:error, :no_data}
def schedule_s3_export(site_id, email_to) do
@spec schedule_s3_export(pos_integer, pos_integer | nil, String.t()) ::
{:ok, Oban.Job.t()} | {:error, :no_data}
def schedule_s3_export(site_id, current_user_id, email_to) do
with :ok <- ensure_has_data(site_id) do
args = %{
"storage" => "s3",
"site_id" => site_id,
"email_to" => email_to,
"current_user_id" => current_user_id,
"s3_bucket" => Plausible.S3.exports_bucket(),
"s3_path" => s3_export_key(site_id)
}
Expand Down Expand Up @@ -207,13 +209,13 @@ defmodule Plausible.Exports do
Builds Ecto queries to export data from `events_v2` and `sessions_v2`
tables into the format of `imported_*` tables for a website.
"""
@spec export_queries(pos_integer,
@spec export_queries(pos_integer, pos_integer | nil,
extname: String.t(),
date_range: Date.Range.t(),
timezone: String.t()
) ::
%{String.t() => Ecto.Query.t()}
def export_queries(site_id, opts \\ []) do
def export_queries(site_id, current_user_id, opts \\ []) do
extname = opts[:extname] || ".csv"
date_range = opts[:date_range]
timezone = opts[:timezone] || "UTC"
Expand All @@ -232,7 +234,8 @@ defmodule Plausible.Exports do
%{
filename.("imported_visitors") => export_visitors_q(site_id, timezone, date_range),
filename.("imported_sources") => export_sources_q(site_id, timezone, date_range),
filename.("imported_pages") => export_pages_q(site_id, timezone, date_range),
filename.("imported_pages") =>
export_pages_q(site_id, current_user_id, timezone, date_range),
filename.("imported_entry_pages") => export_entry_pages_q(site_id, timezone, date_range),
filename.("imported_exit_pages") => export_exit_pages_q(site_id, timezone, date_range),
filename.("imported_custom_events") =>
Expand Down Expand Up @@ -411,13 +414,71 @@ defmodule Plausible.Exports do
]
end

defp export_pages_q(site_id, timezone, date_range) do
from e in sampled("events_v2"),
where: ^export_filter(site_id, date_range),
where: [name: "pageview"],
group_by: [selected_as(:date), e.pathname],
order_by: selected_as(:date),
select: [
defp export_pages_q(site_id, current_user_id, timezone, date_range) do
site = Plausible.Repo.get(Plausible.Site, site_id)
current_user = current_user_id && Plausible.Repo.get(Plausible.Auth.User, current_user_id)

scroll_depth_enabled? =
PlausibleWeb.Api.StatsController.scroll_depth_enabled?(site, current_user)

base_q =
from(e in sampled("events_v2"),
where: ^export_filter(site_id, date_range),
where: [name: "pageview"],
group_by: [selected_as(:date), selected_as(:page)],
order_by: selected_as(:date)
)

if scroll_depth_enabled? do
max_scroll_depth_per_visitor_q =
from(e in "events_v2",
where: ^export_filter(site_id, date_range),
where: e.name == "pageleave" and e.scroll_depth <= 100,
select: %{
date: date(e.timestamp, ^timezone),
page: selected_as(e.pathname, :page),
user_id: e.user_id,
max_scroll_depth: max(e.scroll_depth)
},
group_by: [e.user_id, selected_as(:date), selected_as(:page)]
)

scroll_depth_q =
from(p in subquery(max_scroll_depth_per_visitor_q),
select: %{
date: p.date,
page: p.page,
scroll_depth:
fragment(
"if(isNull(sum(?)), NULL, toUInt64(sum(?)))",
p.max_scroll_depth,
p.max_scroll_depth
),
pageleave_visitors: count(p.user_id)
},
group_by: [:date, :page]
)

from(e in base_q,
left_join: s in subquery(scroll_depth_q),
on: s.date == selected_as(:date) and s.page == selected_as(:page),
select: [
date(e.timestamp, ^timezone),
selected_as(fragment("any(?)", e.hostname), :hostname),
selected_as(e.pathname, :page),
selected_as(
fragment("toUInt64(round(uniq(?)*any(_sample_factor)))", e.session_id),
:visits
),
visitors(e),
selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews),
selected_as(fragment("any(?)", s.scroll_depth), :scroll_depth),
selected_as(fragment("any(?)", s.pageleave_visitors), :pageleave_visitors)
]
)
else
ukutaht marked this conversation as resolved.
Show resolved Hide resolved
base_q
|> select([e], [
date(e.timestamp, ^timezone),
selected_as(fragment("any(?)", e.hostname), :hostname),
selected_as(e.pathname, :page),
Expand All @@ -427,7 +488,8 @@ defmodule Plausible.Exports do
),
visitors(e),
selected_as(fragment("toUInt64(round(count()*any(_sample_factor)))"), :pageviews)
]
])
end
end

defp export_entry_pages_q(site_id, timezone, date_range) do
Expand Down
2 changes: 1 addition & 1 deletion lib/plausible/imported/csv_importer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ defmodule Plausible.Imported.CSVImporter do
"imported_operating_systems" =>
"date Date, operating_system String, operating_system_version String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32, pageviews UInt64",
"imported_pages" =>
"date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64",
"date Date, hostname String, page String, visits UInt64, visitors UInt64, pageviews UInt64, scroll_depth Nullable(UInt64), pageleave_visitors UInt64",
"imported_sources" =>
"date Date, source String, referrer String, utm_source String, utm_medium String, utm_campaign String, utm_content String, utm_term String, pageviews UInt64, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32",
"imported_visitors" =>
Expand Down
2 changes: 2 additions & 0 deletions lib/plausible/imported/page.ex
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,7 @@ defmodule Plausible.Imported.Page do
field :pageviews, Ch, type: "UInt64"
field :exits, Ch, type: "UInt64"
field :time_on_page, Ch, type: "UInt64"
field :scroll_depth, Ch, type: "Nullable(UInt64)"
field :pageleave_visitors, Ch, type: "UInt64"
end
end
1 change: 1 addition & 0 deletions lib/plausible/stats/imported/imported.ex
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ defmodule Plausible.Stats.Imported do

defp can_order_by?(query) do
Enum.all?(query.order_by, fn
{:scroll_depth, _} -> false
ukutaht marked this conversation as resolved.
Show resolved Hide resolved
{metric, _direction} when is_atom(metric) -> metric in query.metrics
_ -> true
end)
Expand Down
20 changes: 20 additions & 0 deletions lib/plausible/stats/imported/sql/expression.ex
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,13 @@ defmodule Plausible.Stats.Imported.SQL.Expression do
wrap_alias([i], %{pageviews: sum(i.pageviews), __internal_visits: sum(i.visits)})
end

defp select_metric(:scroll_depth, "imported_pages") do
wrap_alias([i], %{
scroll_depth_sum: sum(i.scroll_depth),
pageleave_visitors: sum(i.pageleave_visitors)
})
end

defp select_metric(_metric, _table), do: %{}

def group_imported_by(q, query) do
Expand Down Expand Up @@ -351,6 +358,19 @@ defmodule Plausible.Stats.Imported.SQL.Expression do
|> select_joined_metrics(rest)
end

# The final `scroll_depth` gets selected at a later querybuilding step
# (in `Plausible.Stats.SQL.SpecialMetrics.add/3`). But in order to avoid
# having to join with imported data there again, we select the required
# information from imported data here already.
def select_joined_metrics(q, [:scroll_depth | rest]) do
q
|> select_merge_as([s, i], %{
__internal_scroll_depth_sum: i.scroll_depth_sum,
__internal_pageleave_visitors: i.pageleave_visitors
})
|> select_joined_metrics(rest)
end

# Ignored as it's calculated separately
def select_joined_metrics(q, [metric | rest])
when metric in [:conversion_rate, :group_conversion_rate, :percentage] do
Expand Down
63 changes: 53 additions & 10 deletions lib/plausible/stats/sql/special_metrics.ex
Original file line number Diff line number Diff line change
Expand Up @@ -150,15 +150,12 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do
dim_shortnames
|> Enum.map(fn dim -> dynamic([p], field(p, ^dim)) end)

scroll_depth_q =
scroll_depth_sum_q =
subquery(max_per_visitor_q)
|> select([p], %{
scroll_depth:
fragment(
"if(isFinite(avg(?)), toUInt8(round(avg(?))), NULL)",
p.max_scroll_depth,
p.max_scroll_depth
)
scroll_depth_sum:
fragment("if(count(?) = 0, NULL, sum(?))", p.user_id, p.max_scroll_depth),
pageleave_visitors: fragment("count(?)", p.user_id)
})
|> select_merge(^dim_select)
|> group_by(^dim_group_by)
Expand All @@ -173,9 +170,55 @@ defmodule Plausible.Stats.SQL.SpecialMetrics do
|> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end)
end

q
|> join(:left, [e], s in subquery(scroll_depth_q), on: ^join_on_dim_condition)
|> select_merge_as([_e, ..., s], %{scroll_depth: fragment("any(?)", s.scroll_depth)})
joined_q =
join(q, :left, [e], s in subquery(scroll_depth_sum_q), on: ^join_on_dim_condition)

if query.include_imported do
joined_q
|> select_merge_as([..., s], %{
scroll_depth:
fragment(
"""
case
when isNotNull(?) AND isNotNull(?) then
ukutaht marked this conversation as resolved.
Show resolved Hide resolved
toUInt8(round((? + ?) / (? + ?)))
when isNotNull(?) then
toUInt8(round(? / ?))
when isNotNull(?) then
toUInt8(round(? / ?))
else
NULL
end
""",
# Case 1: Both imported and native scroll depth sums are present
selected_as(:__internal_scroll_depth_sum),
s.scroll_depth_sum,
selected_as(:__internal_scroll_depth_sum),
s.scroll_depth_sum,
selected_as(:__internal_pageleave_visitors),
s.pageleave_visitors,
# Case 2: Only imported scroll depth sum is present
selected_as(:__internal_scroll_depth_sum),
selected_as(:__internal_scroll_depth_sum),
selected_as(:__internal_pageleave_visitors),
# Case 3: Only native scroll depth sum is present
s.scroll_depth_sum,
s.scroll_depth_sum,
s.pageleave_visitors
)
})
else
joined_q
|> select_merge_as([..., s], %{
scroll_depth:
fragment(
"if(any(?) > 0, toUInt8(round(any(?) / any(?))), NULL)",
s.pageleave_visitors,
s.scroll_depth_sum,
s.pageleave_visitors
)
})
end
else
q
end
Expand Down
9 changes: 5 additions & 4 deletions lib/plausible_web/controllers/api/stats_controller.ex
Original file line number Diff line number Diff line change
Expand Up @@ -393,15 +393,16 @@ defmodule PlausibleWeb.Api.StatsController do

defp fetch_other_top_stats(site, query, current_user) do
page_filter? = Filters.filtering_on_dimension?(query, "event:page")
scroll_depth_enabled? = scroll_depth_enabled?(site, current_user)

metrics = [:visitors, :visits, :pageviews, :sample_percent]

metrics =
cond do
page_filter? && query.include_imported ->
metrics
page_filter? && scroll_depth_enabled? && query.include_imported ->
metrics ++ [:scroll_depth]

page_filter? && scroll_depth_enabled?(site, current_user) ->
page_filter? && scroll_depth_enabled? ->
metrics ++ [:bounce_rate, :scroll_depth, :time_on_page]

page_filter? ->
Expand Down Expand Up @@ -831,7 +832,7 @@ defmodule PlausibleWeb.Api.StatsController do
params = Map.put(params, "property", "event:page")
query = Query.from(site, params, debug_metadata(conn))

include_scroll_depth? = !query.include_imported && scroll_depth_enabled?(site, current_user)
include_scroll_depth? = scroll_depth_enabled?(site, current_user)

extra_metrics =
cond do
Expand Down
5 changes: 3 additions & 2 deletions lib/plausible_web/live/csv_export.ex
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,12 @@ defmodule PlausibleWeb.Live.CSVExport do

@impl true
def handle_event("export", _params, socket) do
%{storage: storage, site_id: site_id, email_to: email_to} = socket.assigns
%{storage: storage, site_id: site_id, email_to: email_to, current_user: current_user} =
socket.assigns

schedule_result =
case storage do
"s3" -> Exports.schedule_s3_export(site_id, email_to)
"s3" -> Exports.schedule_s3_export(site_id, current_user.id, email_to)
"local" -> Exports.schedule_local_export(site_id, email_to)
end

Expand Down
4 changes: 3 additions & 1 deletion lib/workers/export_analytics.ex
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ defmodule Plausible.Workers.ExportAnalytics do
"site_id" => site_id
} = args

current_user_id = args["current_user_id"]

site = Plausible.Repo.get!(Plausible.Site, site_id)
%Date.Range{} = date_range = Exports.date_range(site.id, site.timezone)

queries =
Exports.export_queries(site_id,
Exports.export_queries(site_id, current_user_id,
RobertJoonas marked this conversation as resolved.
Show resolved Hide resolved
date_range: date_range,
timezone: site.timezone,
extname: ".csv"
Expand Down
9 changes: 3 additions & 6 deletions test/plausible/exports_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ defmodule Plausible.ExportsTest do

describe "export_queries/2" do
test "returns named ecto queries" do
queries = Plausible.Exports.export_queries(_site_id = 1)
queries = Plausible.Exports.export_queries(_site_id = 1, nil)
assert queries |> Map.values() |> Enum.all?(&match?(%Ecto.Query{}, &1))

assert Map.keys(queries) == [
Expand All @@ -26,7 +26,7 @@ defmodule Plausible.ExportsTest do

test "with date range" do
queries =
Plausible.Exports.export_queries(_site_id = 1,
Plausible.Exports.export_queries(_site_id = 1, nil,
date_range: Date.range(~D[2023-01-01], ~D[2024-03-12])
)

Expand All @@ -45,10 +45,7 @@ defmodule Plausible.ExportsTest do
end

test "with custom extension" do
queries =
Plausible.Exports.export_queries(_site_id = 1,
extname: ".ch"
)
queries = Plausible.Exports.export_queries(_site_id = 1, nil, extname: ".ch")

assert Map.keys(queries) == [
"imported_browsers.ch",
Expand Down
Loading
Loading