Skip to content

Commit

Permalink
Allow different filters for sponsor/site in sankey
Browse files Browse the repository at this point in the history
They can be manual, ror, or country
Provide the correct country column
  • Loading branch information
ccunningham101 committed Dec 5, 2023
1 parent 3af79ac commit 81ec785
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 9 deletions.
39 changes: 34 additions & 5 deletions query_ror.py
Original file line number Diff line number Diff line change
Expand Up @@ -528,17 +528,17 @@ def site_sponsor(args):
for index, name in enumerate(counts.sponsor_who_region.unique())
}
link = dict(
source=list(counts.who_region.map(who_map)),
target=list(counts.sponsor_who_region.map(who_sponsor_map)),
source=list(counts.sponsor_who_region.map(who_sponsor_map)),
target=list(counts.who_region.map(who_map)),
value=list(counts.trial_id),
)
data = go.Sankey(
link=link, node=dict(label=list(who_map.keys()) + list(who_sponsor_map.keys()))
)
fig = go.Figure(data)
sources = sorted(set(merged.source_x).intersection(set(merged.source_y)))
sources = sorted(set(merged.source))
fig.update_layout(
title=f"Mapping Trials Sites Country to Sponsor Country by WHO Region (data from: {' '.join(sources)})",
title=f"Mapping Sponsor to Trial Site by WHO Region (data from: {' '.join(sources)})",
)
fig.write_html("sankey.html")

Expand Down Expand Up @@ -725,6 +725,12 @@ def multisite(args):
map_parser.set_defaults(func=make_map)

org_parser = subparsers.add_parser("sponsor-org", parents=[results])
org_parser.add_argument(
"--country-column",
type=str,
help="Name of country column to use",
default="country",
)
org_parser.set_defaults(func=org_region)

flowchart_parser = subparsers.add_parser("flowchart", parents=[results])
Expand All @@ -750,11 +756,34 @@ def multisite(args):
help="One or more glob patterns for matching input files",
)
site_sponsor_parser.add_argument(
"--file-filter",
"--sponsor-filter",
choices=["manual", "ror", "country"],
default="country",
help="Filter registry data",
)
site_sponsor_parser.add_argument(
"--site-filter",
choices=["manual", "ror", "country"],
default="country",
help="Filter registry data",
)
site_sponsor_parser.add_argument(
"--sponsor-country-column",
type=str,
help="Name of sponsor country column to use",
default="country",
)
site_sponsor_parser.add_argument(
"--site-country-column",
type=str,
help="Name of site country column to use",
default="country",
)
site_sponsor_parser.add_argument(
"--exclude-indiv-company",
action="store_true",
help="Exclude individuals and companies",
)
site_sponsor_parser.set_defaults(func=site_sponsor)

args = ror_parser.parse_args()
Expand Down
9 changes: 5 additions & 4 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ def load_glob(filenames, file_filter, exclude_indiv_company=False):
if "country" not in df.columns:
logging.info(f"Skipping {input_file}: has no country data")
continue

# One entry per trial/source/country
df = df.groupby(["trial_id", "source", "country"]).first().reset_index()
# TODO: do we need to merge so they have the same columns? Fillna
logging.info(f"Adding {input_file}")
frames.append(df)
Expand Down Expand Up @@ -842,17 +843,17 @@ def region_map(counts, country_column="country", legend_title="Number of Trials"
legend=True,
legend_kwds={"label": f"{legend_title}"},
)
ax.set_title(f"{region_name} Trial Sites")
ax.set_title(f"{region_name}")
ax.set_xticklabels([])
ax.set_yticklabels([])


def region_pie(df, legend_title="Number of Trials"):
def region_pie(df, country_column, legend_title="Number of Trials"):
"""
Counts is a series indexed by iso2 country
"""
# TODO: which country- country_ror?
df["who_region"] = map_who(df["country"])
df["who_region"] = map_who(df[country_column])
grouped = df.groupby("who_region")

orgs = df.organization_type.unique()
Expand Down

0 comments on commit 81ec785

Please sign in to comment.