Skip to content

Commit

Permalink
Wordcloud integration (#43)
Browse files Browse the repository at this point in the history
* wordcloud integration

* linting

* lint

* lint
  • Loading branch information
paulzierep authored Jan 24, 2024
1 parent 60eb8d1 commit facddc3
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 2 deletions.
83 changes: 83 additions & 0 deletions bin/create_wordcloud.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
#!/usr/bin/env python

import argparse

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
from wordcloud import WordCloud


def get_wordcloud(community_tool_path: str, mask_figure: str, stats_column: str, wordcloud_output_path: str) -> None:
"""
Generate a wordcloud based on the counts for each Galaxy wrapper id
:param community_tool_path: Dataframe that must
have the columns "Galaxy wrapper id" and `stats_column`
:param mask_figure: a figure that is used to render the wordcloud
E.g. a nice shape to highlight your community
:param stats_column: Column name of the
column with usage statistics in the table
:param wordcloud_output_path: Path to store the wordcloud
"""

community_tool_stats = pd.read_csv(community_tool_path, sep="\t")

assert (
stats_column in community_tool_stats
), f"Stats column: {stats_column} not found in table!" # check if the stats column is there

# create the word cloud
frec = pd.Series(
community_tool_stats[stats_column].values, index=community_tool_stats["Galaxy wrapper id"]
).to_dict()

mask = np.array(Image.open(mask_figure))
mask[mask == 0] = 255 # set 0 in array to 255 to work with wordcloud

wc = WordCloud(
mask=mask,
background_color="rgba(255, 255, 255, 0)",
random_state=42,
)

wc.generate_from_frequencies(frec)

fig, ax = plt.subplots(figsize=(13, 5))
ax.imshow(wc)

plt.axis("off")
plt.tight_layout(pad=0)

plt.savefig(wordcloud_output_path)


if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Create wordcloud from \
TSV file based on Galaxy usage statistics"
)
parser.add_argument(
"--table",
"-ta",
required=True,
help="Path to TSV file with tools and stats",
)
parser.add_argument(
"--stats_column",
"-sc",
required=True,
help="Name of the column with usage statistics",
)
parser.add_argument(
"--output",
"-out",
required=True,
help="Path to HTML output",
)

parser.add_argument("--wordcloud_mask", "-wcm", required=False, help="Mask figure to generate the wordcloud")

args = parser.parse_args()
get_wordcloud(args.table, args.wordcloud_mask, args.stats_column, args.output)
8 changes: 7 additions & 1 deletion bin/extract_all_tools_downstream.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,10 @@ mkdir -p 'results/'
python bin/create_interactive_table.py \
--table "results/all_tools.tsv" \
--template "data/interactive_table_template.html" \
--output "results/index.html"
--output "results/index.html"

python bin/create_wordcloud.py \
--table "results/all_tools.tsv" \
--wordcloud_mask "data/usage_stats/wordcloud_mask.png" \
--output "results/all_tools_wordcloud.png" \
--stats_column "https://usegalaxy.eu usage"
6 changes: 6 additions & 0 deletions bin/get_community_tools.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,11 @@ for com_data_fp in data/communities/* ; do
--template "data/interactive_table_template.html" \
--output "results/$community/index.html"

python bin/create_wordcloud.py \
--table "results/$community/tools.tsv" \
--wordcloud_mask "data/usage_stats/wordcloud_mask.png" \
--output "results/$community/tools_wordcloud.png" \
--stats_column "https://usegalaxy.eu usage"

fi;
done
Binary file added data/usage_stats/wordcloud_mask.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 5 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
pandas
PyGithub
pyyaml
pyyaml
numpy
PIL
matplotlib
wordcloud

0 comments on commit facddc3

Please sign in to comment.