Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2188: Removed Metrics_Type Column + Updated File Names #400

Merged
merged 2 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions analytics/upress_reporting/counter_5_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ def create_reports(self):

view_data_poller = InteractionEventPoller(date_range=self.reporting_period,
reporting_data=df,
file_id_regex=r"REST.GET.OBJECT manifests/(.*?json)\s",
file_id_regex=VIEW_FILE_ID_REGEX,
bucket_name=self.view_bucket,
interaction_type=InteractionType.VIEW)
download_data_poller = InteractionEventPoller(date_range=self.reporting_period,
reporting_data=df,
file_id_regex=r"REST.GET.OBJECT (.+pdf\s)",
file_id_regex=DOWNLOAD_FILE_ID_REGEX,
bucket_name=self.download_bucket,
interaction_type=InteractionType.DOWNLOAD)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ class InteractionEvent():
publication_year: Optional[str]
disciplines: Optional[str]
usage_type: str
interaction_type: Optional[str]
timestamp: Optional[str]
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def _match_log_info_with_drb_data(self, log_object) -> InteractionEvent | None:
publication_year=match_data["publication_year"],
disciplines=match_data["disciplines"],
usage_type=match_data["usage_type"],
interaction_type=self.interaction_type.value,
timestamp=match_time[0]
)

Expand Down
19 changes: 9 additions & 10 deletions analytics/upress_reporting/models/reports/counter_5_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@ def aggregate_interaction_events(self, events, reporting_data):
"Publication Year",
"Disciplines",
"Usage Type",
"Metric Type",
"Timestamp"
]

interaction_type = events[0].interaction_type
accessed_titles_df = self._create_events_df(events, columns)
accessed_titles_df["Timestamp"] = accessed_titles_df["Timestamp"].apply(
self._reformat_timestamp_data)
Expand All @@ -64,7 +62,7 @@ def aggregate_interaction_events(self, events, reporting_data):

zeroed_out_titles_df = self._format_zeroed_out_titles(
df=reporting_data, columns=columns,
monthly_columns=monthly_columns, interaction_type=interaction_type)
monthly_columns=monthly_columns)

merged_df = pandas.concat(
[accessed_titles_df, zeroed_out_titles_df], ignore_index=True)
Expand All @@ -87,11 +85,9 @@ def aggregate_interaction_events_by_country(self, events, reporting_data):
"Publication Year",
"Disciplines",
"Usage Type",
"Metric Type",
"Timestamp"
]

interaction_type = events[0].interaction_type
accessed_titles_df = self._create_events_df(events=events,
columns=columns,
include_country=True)
Expand All @@ -117,7 +113,7 @@ def aggregate_interaction_events_by_country(self, events, reporting_data):

zeroed_out_titles_df = self._format_zeroed_out_titles(
df=reporting_data, columns=columns,
monthly_columns=monthly_columns, interaction_type=interaction_type,
monthly_columns=monthly_columns,
include_country=True)

accessed_titles_df.loc[:,
Expand All @@ -133,7 +129,7 @@ def aggregate_interaction_events_by_country(self, events, reporting_data):

return (merged_df.columns.tolist(), merged_df.to_dict(orient="records"))

def build_header(self, report_name, report_description):
def build_header(self, report_name, report_description, metric_type):
"""TODO: Add further Record.source mappings to publishers as we advance
in project (ex. University of Louisiana, Lafayette)"""
publisher_mappings = {
Expand All @@ -144,13 +140,17 @@ def build_header(self, report_name, report_description):
"Report_ID": self.generate_report_id(),
"Report_Description": report_description,
"Publisher_Name": publisher_mappings.get(self.publisher, ""),
"Metric_Type": metric_type,
"Reporting_Period": self._format_reporting_period_to_string(),
"Created": self.created,
"Created_By": "NYPL",
}

def write_to_csv(self, file_name, header, column_names, data):
with open(file_name, 'w') as csv_file:
if "/" in file_name:
file_name = file_name.replace("/ ", "(") + ")"

with open(file_name+".csv", 'w') as csv_file:
writer = csv.writer(csv_file, delimiter="|",
quoting=csv.QUOTE_NONE)
for key, value in header.items():
Expand All @@ -161,7 +161,7 @@ def write_to_csv(self, file_name, header, column_names, data):
writer.writerow(title.values())

def _format_zeroed_out_titles(self, df, columns, monthly_columns,
interaction_type, include_country=False):
include_country=False):
unaccessed_titles = df.loc[df["accessed"] == False]
recarray = unaccessed_titles.to_records()

Expand All @@ -175,7 +175,6 @@ def _format_zeroed_out_titles(self, df, columns, monthly_columns,
publication_year=title.publication_year,
disciplines=title.disciplines,
usage_type=title.usage_type,
interaction_type=interaction_type,
timestamp=None) for title in recarray]

zeroed_out_df = self._create_events_df(zeroed_out_events, columns,
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/country_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ def build_report(self, events, reporting_data):
print("Building country-level report...")

if len(events) > 0:
file_name = f"{self.publisher}_country_level_report_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title by Country",
report_description="Usage of your books on NYPL's Digital Research Books by country.")
report_description="Usage of your books on NYPL's Digital Research Books by country.",
metric_type="Views + Downloads")
columns, final_data = self.aggregate_interaction_events_by_country(events,
reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def build_report(self, events, reporting_data):
print("Building downloads report...")

if len(events) > 0:
file_name = f"{self.publisher}_downloads_report_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title / Downloads",
report_description="Downloads of your books from NYPL's Digital Research Books by title.")
report_description="Downloads of your books from NYPL's Digital Research Books by title.",
metric_type="Downloads (loading of title contents)")
columns, final_data = self.aggregate_interaction_events(events, reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/total_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def build_report(self, events, reporting_data):
print("Building total usage report...")

if len(events) > 0:
file_name = f"{self.publisher}_total_usage_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title",
report_description="Usage of your books on NYPL's Digital Research Books.")
report_description="Usage of your books on NYPL's Digital Research Books.",
metric_type="Views (clicks on title) + Downloads (loading of title contents)")
columns, final_data = self.aggregate_interaction_events(events, reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def build_report(self, events, reporting_data):
print("Building views report...")

if len(events) > 0:
file_name = f"{self.publisher}_views_report_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title / Views",
report_description="Views of your books from NYPL's Digital Research Books by title.")
report_description="Views of your books from NYPL's Digital Research Books by title.",
metric_type="Views (clicks on title)")
columns, final_data = self.aggregate_interaction_events(events, reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down
Loading