diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index cbf40f4..c75b28c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,16 +2,15 @@ name: main on: [push] jobs: - conda-env: + test: + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10"] runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - + - uses: actions/checkout@v4 - name: git setup # Set up git and export env vars to be used in later steps. - # Note the unconventional mechanism for exporting envs by appending to - # $GITHUB_ENV. id: git-setup run: | git config --global user.email "action@github.com" @@ -19,54 +18,41 @@ jobs: echo "BRANCH=${GITHUB_REF##*/}" >> $GITHUB_ENV echo "WORKDIR=$(pwd)" >> $GITHUB_ENV - - - name: load cached conda env - # Load cached env. - # This looks for a cache based on the hash of requirements.txt and - # test-requirements.txt; if it's found the cached directory is - # restored. There's an extra "v1" on the end so it can be forced to be - # regenerated when needed. - id: cache-env - uses: actions/cache@v2 - with: - path: /tmp/test-env - key: ${{ hashFiles('requirements.txt') }}-${{ hashFiles('test-requirements.txt') }}-v1 - - - name: build new conda env - # Build cache if needed. - # Only runs if there was a cache miss. If this is created. there's - # a "Post load cached env" job (which is automatically created, it's - # not defined here) that will load this into the cache for use next - # time. - if: steps.cache-env.outputs.cache-hit != 'true' run: | eval "$(conda shell.bash hook)" - conda create -p /tmp/test-env -y --file requirements.txt --file test-requirements.txt --channel conda-forge --channel bioconda - + conda install -n base mamba -y --channel conda-forge + mamba create -p ./env -y python=${{ matrix.python-version }} --file test-requirements.txt --channel conda-forge --channel bioconda + conda activate ./env + pip install -e . - - name: run pytests and build docs + - name: run pytests # pytests and doctests happen here run: | eval "$(conda shell.bash hook)" - source activate /tmp/test-env - python setup.py install + conda activate ./env pytest -vv --doctest-modules trackhub + - name: build docs + run: | # To make sure that the links in the built docs refer to the correct # branch on the trackhub-demo repo, we replace the branch names in the # .rst files. Note if we're on master branch then this is a no-op. - cd doc - find . -name "*.rst" | xargs sed -i "s|trackhub-demo/master|trackhub-demo/$BRANCH|g" - make doctest html - conda deactivate + eval "$(conda shell.bash hook)" + conda activate ./env + conda env export + ( + cd doc + find . -name "*.rst" | xargs sed -i "s|trackhub-demo/master|trackhub-demo/$BRANCH|g" + make doctest html + ) - name: test command-line script # Ensure that the installed command-line script can be run and creates # the correct files run: | eval "$(conda shell.bash hook)" - source activate /tmp/test-env + conda activate ./env trackhub_from_excel --template trackhub_from_excel --create-example a.xlsx trackhub_from_excel --excel-file a.xlsx @@ -77,32 +63,26 @@ jobs: err=1 fi done - if [ err == 1 ]; then + if [ $err == 1 ]; then exit 1 fi - name: upload the just-built docs as an artifact - # The built docs will be uploaded as a zip file (called docs.zip). - # This file will be available on the Actions page and can be used to - # inspect the final rendered docs. This is useful when building on - # a branch and for contributors to make corrections to the docs without - # needing to set everything up locally. - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: - name: docs + name: docs-${{matrix.python-version}}-${{ github.run_id }} path: doc/build/html + overwrite: true # v4 requires unique names; this will delete existing before uploading - - name: commit built docs to gh-pages branch - # Commit to the gh-pages branch. - # Note that this step is not restricted to the master branch, which - # lets us better test the process. The changes aren't actually pushed - # though unless we're on the master branch (see next step). + - name: commit built docs to gh-pages + # The changes aren't actually pushed unless we're on the master branch + # (see next step). # - # Note that cloning just the gh-pages branch to a new directory ended - # up being easier than staying in this directory and copying stuff - # around within it. + # Note that cloning just the gh-pages branch to a new directory ended + # up being easier than staying in this directory and copying stuff + # around within it. run: | git clone \ --single-branch \ @@ -145,7 +125,7 @@ jobs: git branch git checkout $BRANCH eval "$(conda shell.bash hook)" - source activate /tmp/test-env + conda activate ./env ci/build_examples.py @@ -192,7 +172,7 @@ jobs: # It's possible that the $BRANCH here in the trackhub repo does not yet # exist over in the trackhub-demo repo. In that case, we should be good - # to push. + # to push immediately. if [[ -z $(git ls-remote --heads origin $BRANCH) ]]; then echo "remote branch $BRANCH does not exist" git commit -m 'update hub' @@ -200,7 +180,7 @@ jobs: exit 0 fi - # Otherwise, only push if there are changes. + # Otherwise, only push to existing branch if there are changes. if git diff origin/$BRANCH --quiet; then echo "no changes to push to branch $BRANCH!"; else @@ -219,7 +199,7 @@ jobs: run: | git checkout $BRANCH eval "$(conda shell.bash hook)" - source activate /tmp/test-env + conda activate ./env ci/check_hubs.py @@ -231,13 +211,3 @@ jobs: run: | ssh-add -D rm -Rf * - - - pip-install: - # Separate, parallel job for testing pip installation - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - run: python setup.py sdist - - run: pip install dist/*.tar.gz - - run: python -c 'import trackhub' diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000..97cc8bb --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,15 @@ +on: workflow_dispatch +jobs: + publish-testpypi: + name: Upload release to Test PyPI + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://pypi.org/p/trackhub + permissions: + id-token: write + steps: + - name: Publish to Test PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy diff --git a/MANIFEST.in b/MANIFEST.in index 3f09cd9..f15ede4 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include ez_setup.py include *.rst include *.txt +recursive-include trackhub/test/data * diff --git a/ci/check_hubs.py b/ci/check_hubs.py index 7b0435c..7534361 100755 --- a/ci/check_hubs.py +++ b/ci/check_hubs.py @@ -35,7 +35,7 @@ for line in open(args.tsv): line = line.strip() - if line.startswith('#') or not line: + if line.startswith("#") or not line: continue # Dest in trackhub demo is the second column of TSV, without the initial directory @@ -59,7 +59,15 @@ # with "can't find database hg19 in hg.conf, should have a default # named "db"". When checking the hub in the Genome Browser it seems # fine. So allowing this as a "pass". - if line.startswith("can't find database") and "hic_hub" in str(dest): + # + # Some time later... + # hubCheck for those same tracks is now showing "can't find profile + # central in hg.conf" error. Aain the tracks look fine the browser, + # so we'll consider that OK as well. + if "hic_hub" in str(dest) and ( + line.startswith("can't find database") + or line.startswith("can't find profile central in hg.conf") + ): warnings = True elif not (line.startswith("Found") or line.startswith("warning:")): error = True diff --git a/doc/source/changelog.rst b/doc/source/changelog.rst index fc34c5c..e5c5678 100644 --- a/doc/source/changelog.rst +++ b/doc/source/changelog.rst @@ -1,28 +1,51 @@ Changelog ========= -Version 0.3 ------------ +Version 1.0 (April 2024) +------------------------ + +This major release is backwards-compatible but adds many new features and tests. Testing infrastructure overhaul ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -See :ref:`tests` for details. +Extensive example hubs that demonstrate the various track types and +configuration options. These use data provided by UCSC and are built as part of +the testing framework. Thanks to Eva Jason (@evajason). -Documentation updates -~~~~~~~~~~~~~~~~~~~~~ +See :ref:`tests` and :ref:`tracktypes` for details. -- Examples have been added for all of the available track types. See - :ref:`tracktypes` for details. -- New section that details how validation works (see :ref:`validation`). +Build a trackhub configured in Excel +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Build a trackhub using only a command-line tool and configuration in Excel. +This new tool, ``trackhub_from_excel``, can write out a template you can use to +fill in details on what tracks should be included, how to organize them, and +configuration options like color, visibility, etc. + +Running the tool on your filled-out Excel file will build a trackhub ready to +be uploaded. + +Thanks Eva Jason (@evajason) for this new feature. + +Semi-automated updating of validated parameters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Dramatically improved semi-automatic updating mechanism for making sure +parameters are up-to-date with the main UCSC instance (see +``trackhub/parse.py`` and ``trackhub/parsed_params.py``). These scrape the UCSC +database document HTML to identify parameters, and writes this out to Python +module that can be diffed against the existing ``parsed_params.py`` module. In +addition to supporting validation, this highlights what has changed in the UCSC +Genome Browser since the last trackhub Python package update. Other changes ~~~~~~~~~~~~~ +- New section that details how validation works (see :ref:`validation`). + - `defaultPos` can now be set when using the `default_hub` helper function - Support for all track types supported in track hubs (see :ref:`tracktypes`) - - There is now one naming convention, `add_tracks`, rather than multiple methods names that perform this for different track types. `add_view` and `add_subtracks` can now simply be replaced with `add_tracks`. These methods @@ -30,15 +53,15 @@ Other changes two. The older method names are retained for backwards compatibility, but all documentation has been updated to reflect this change. -- dramatically improved semi-automatic updating mechanism for making sure - parameters are up-to-date with the main UCSC instance (see - ``trackhub/parse.py`` and ``trackhub/parsed_params.py``). - - fixed indentation when super tracks, composite tracks, and view tracks are nested. - improved handling of rsync and handling symlink modification times +- Dropped support for Python 2.7 + +- Migrated tests from Travis CI to GitHub Actions and to use pytest fixtures. + Version 0.2.4 (Dec 2017) ------------------------ diff --git a/doc/source/excelinstructions.rst b/doc/source/excelinstructions.rst index 4c9425e..70c8f12 100644 --- a/doc/source/excelinstructions.rst +++ b/doc/source/excelinstructions.rst @@ -107,7 +107,7 @@ Super tracks are within the track hub and therefore do not need special fields. Example ``view_config`` sheet -```````````````````````````` +````````````````````````````` .. list-table:: :header-rows: 1 diff --git a/doc/source/index.rst b/doc/source/index.rst index cfc8b6e..faf9e29 100644 --- a/doc/source/index.rst +++ b/doc/source/index.rst @@ -13,6 +13,6 @@ Contents: assembly_example groupAutoScale tracktypes - autodocs tests + autodocs changelog diff --git a/doc/source/tests.rst b/doc/source/tests.rst new file mode 100644 index 0000000..f0a8634 --- /dev/null +++ b/doc/source/tests.rst @@ -0,0 +1,19 @@ + +.. _tests: + +Testing infrastructure +====================== +Tests are run on GitHub Actions, configured in ``.github/workflows/main.yml``. + +In addition to unit tests, now any code in the documentation that shows how to +build example track hubs is handled like this: + +- extract code from documentation (see ``ci/example_hubs.tsv`` for the list of + files from which code is extracted) +- execute code to build track hub (see ``ci//build_examples.py``) +- upload code and built track hub (and data, if relevant) to the `trackhub-demo + `_ repository +- run ``hubCheck`` on the just-uploaded hubs (see ``ci/check_hubs.py``) + +The just-built track hubs are then live, and linked to from within the +documentation. diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..68847bb --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools >= 61.0"] +build-backend = "setuptools.build_meta" diff --git a/setup.py b/setup.py index 2c21b69..fe7902b 100644 --- a/setup.py +++ b/setup.py @@ -26,14 +26,17 @@ package_dir = {"trackhub": "trackhub"}, scripts=["trackhub/trackhub_from_excel"], license = 'MIT', - author_email="dalerr@niddk.nih.gov", + author_email="ryan.dale@nih.gov", classifiers=[ 'Intended Audience :: Science/Research', 'License :: OSI Approved :: GNU General Public License (GPL)', 'Topic :: Scientific/Engineering :: Bio-Informatics', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Programming Language :: Python :: 3.11', + 'Programming Language :: Python :: 3.12', 'Programming Language :: Python :: 3', 'Topic :: Software Development :: Libraries :: Python Modules', ], diff --git a/test-requirements.txt b/test-requirements.txt index e03fc99..e8e1d90 100644 --- a/test-requirements.txt +++ b/test-requirements.txt @@ -5,6 +5,7 @@ pybedtools pytest sphinx>4 pygments +sphinx_rtd_theme>=1.2.2 ucsc-bedgraphtobigwig ucsc-fatotwobit ucsc-bedtobigbed diff --git a/trackhub/__init__.py b/trackhub/__init__.py index a8f29a4..533a177 100644 --- a/trackhub/__init__.py +++ b/trackhub/__init__.py @@ -11,12 +11,21 @@ from .assembly import Assembly from .groups import GroupsFile, GroupDefinition from .trackdb import TrackDb -from .track import BaseTrack, Track, SubGroupDefinition, CompositeTrack, \ - ViewTrack, SuperTrack, AggregateTrack +from .track import ( + BaseTrack, + Track, + SubGroupDefinition, + CompositeTrack, + ViewTrack, + SuperTrack, + AggregateTrack, +) from .version import version as __version__ -def default_hub(hub_name, genome, email, short_label=None, long_label=None, defaultPos=None): +def default_hub( + hub_name, genome, email, short_label=None, long_label=None, defaultPos=None +): """ Returns a fully-connected set of hub components using default filenames. @@ -46,15 +55,11 @@ def default_hub(hub_name, genome, email, short_label=None, long_label=None, defa if long_label is None: long_label = short_label - hub = Hub( - hub=hub_name, - short_label=short_label, - long_label=long_label, - email=email) + hub = Hub(hub=hub_name, short_label=short_label, long_label=long_label, email=email) genome_kwargs = {} if defaultPos: - genome_kwargs['defaultPos'] = defaultPos + genome_kwargs["defaultPos"] = defaultPos genome = Genome(genome, **genome_kwargs) genomes_file = GenomesFile() trackdb = TrackDb() diff --git a/trackhub/assembly.py b/trackhub/assembly.py index a3c0427..94789d6 100644 --- a/trackhub/assembly.py +++ b/trackhub/assembly.py @@ -53,7 +53,8 @@ def filename(self): return os.path.join( os.path.dirname(self.assembly.genomes_file.filename), self.assembly.genome, - self.assembly.genome + '.2bit') + self.assembly.genome + ".2bit", + ) def validate(self): if not os.path.exists(self.source): @@ -63,20 +64,22 @@ def validate(self): def filename(self, fn): self._filename = fn - def _render(self, staging='staging'): + def _render(self, staging="staging"): pass class Assembly(Genome): - def __init__(self, - genome, - twobit_file=None, - groups=None, - trackdb=None, - genome_file_obj=None, - html_string=None, - html_string_format='rst', - **kwargs): + def __init__( + self, + genome, + twobit_file=None, + groups=None, + trackdb=None, + genome_file_obj=None, + html_string=None, + html_string_format="rst", + **kwargs + ): """ Represents a genome stanza within a "genomes.txt" file for a non-UCSC genome. @@ -106,8 +109,8 @@ def __init__(self, self._orig_kwargs = kwargs self.track_field_order = [] - self.track_field_order.extend(constants.track_fields['assembly']) - self.track_field_order.extend(constants.track_fields['all']) + self.track_field_order.extend(constants.track_fields["assembly"]) + self.track_field_order.extend(constants.track_fields["all"]) self.add_params(**kwargs) @@ -145,8 +148,9 @@ def add_params(self, **kw): for k, v in kw.items(): if k not in self.track_field_order: raise ParameterError( - '"{0}" is not a valid parameter for {1}' - .format(k, self.__class__.__name__) + '"{0}" is not a valid parameter for {1}'.format( + k, self.__class__.__name__ + ) ) constants.param_dict[k].validate(v) @@ -181,11 +185,11 @@ def __str__(self): s = [] - s.append('genome %s' % self.genome) - s.append('trackDb %s' % self.trackdb.filename) - s.append('twoBitPath %s' % self.twobit.filename) + s.append("genome %s" % self.genome) + s.append("trackDb %s" % self.trackdb.filename) + s.append("twoBitPath %s" % self.twobit.filename) if self.groups is not None: - s.append('groups %s' % self.groups.filename) + s.append("groups %s" % self.groups.filename) for name in self.track_field_order: value = self.kwargs.pop(name, None) @@ -194,10 +198,10 @@ def __str__(self): s.append("%s %s" % (name, value)) if self._html is not None: - s.append('htmlDocumentation %s' % self._html.filename) + s.append("htmlDocumentation %s" % self._html.filename) self.kwargs = self._orig_kwargs.copy() - return '\n'.join(s) + '\n' + return "\n".join(s) + "\n" def validate(self): Genome.validate(self) @@ -213,7 +217,8 @@ def filename(self): return os.path.join( os.path.dirname(self.genomes_file.filename), self.genome.genome, - '%s_info.html' % self.genome.genome) + "%s_info.html" % self.genome.genome, + ) @property def genomes_file(self): @@ -235,6 +240,7 @@ def genome(self): def validate(self): if not self.genome: - raise ValueError("HTMLDoc object must be connected to an" - "Assembly subclass instance") + raise ValueError( + "HTMLDoc object must be connected to an" "Assembly subclass instance" + ) return True diff --git a/trackhub/base.py b/trackhub/base.py index db7ef8e..540399c 100644 --- a/trackhub/base.py +++ b/trackhub/base.py @@ -6,25 +6,28 @@ def deprecation_handler(source, filename, kwargs): - if 'local_fn' in kwargs: + if "local_fn" in kwargs: warnings.warn( - 'Please use the argument "source" instead of "local_fn"', - DeprecationWarning) + 'Please use the argument "source" instead of "local_fn"', DeprecationWarning + ) if source is not None: raise ValueError( 'Both "source" and "local_fn" are specified. Please use ' - 'just "source"') - source = kwargs.pop('local_fn') + 'just "source"' + ) + source = kwargs.pop("local_fn") - if 'remote_fn' in kwargs: + if "remote_fn" in kwargs: warnings.warn( 'Please use the argument "source" instead of "remote_fn"', - DeprecationWarning) + DeprecationWarning, + ) if filename is not None: raise ValueError( 'Both "filename" and "remote_fn" are specified. Please use ' - 'just "filename"') - filename = kwargs.pop('remote_fn') + 'just "filename"' + ) + filename = kwargs.pop("remote_fn") return source, filename @@ -33,11 +36,12 @@ class HubComponent(object): Base class for various track hub components. Several methods must be overridden by subclasses. """ + def __init__(self): self.children = [] self.parent = None - def _render(self, staging='staging'): + def _render(self, staging="staging"): """ Renders the object to file. Must be overridden by subclass. @@ -45,7 +49,8 @@ def _render(self, staging='staging'): """ raise NotImplementedError( "%s: subclasses must define their own _render() method" - % self.__class__.__name__) + % self.__class__.__name__ + ) def validate(self): """ @@ -54,7 +59,8 @@ def validate(self): """ raise NotImplementedError( "%s: subclasses must define their own validate() method" - % self.__class__.__name__) + % self.__class__.__name__ + ) def add_child(self, child): """ @@ -112,10 +118,8 @@ def leaves(self, cls, level=0, intermediate=False): raise StopIteration for child in self.children: - for leaf, _level in child.leaves( - cls, level + 1, intermediate=intermediate - ): - yield leaf, _level + for leaf, _level in child.leaves(cls, level + 1, intermediate=intermediate): + yield leaf, _level def render(self, staging=None): """ diff --git a/trackhub/compatibility.py b/trackhub/compatibility.py index 2afcebc..5a96fe1 100644 --- a/trackhub/compatibility.py +++ b/trackhub/compatibility.py @@ -10,17 +10,16 @@ PY2 = PY == 2 if PY3: - string_types = str, + string_types = (str,) if PY2: - string_types = basestring, + string_types = (basestring,) def py2_unicode(cls): if PY2: - if '__str__' not in cls.__dict__: - raise ValueError( - 'no __str__method defined for {}'.format(cls.__name__)) + if "__str__" not in cls.__dict__: + raise ValueError("no __str__method defined for {}".format(cls.__name__)) cls.__unicode__ = cls.__str__ - cls.__str__ = lambda self: self.__unicode__().encode('utf-8') + cls.__str__ = lambda self: self.__unicode__().encode("utf-8") return cls diff --git a/trackhub/constants.py b/trackhub/constants.py index 4ae6894..274a1d9 100644 --- a/trackhub/constants.py +++ b/trackhub/constants.py @@ -8,7 +8,7 @@ param_dict = {i.name: i for i in param_defs} # These should at least be first... -initial_params = ['track', 'bigDataUrl', 'shortLabel', 'longLabel', 'type'] +initial_params = ["track", "bigDataUrl", "shortLabel", "longLabel", "type"] trackhub_specific = ["source", "tracktype", "name"] @@ -24,4 +24,4 @@ if tracktype in param.types: lst.append(param.name) -INDENT = ' ' +INDENT = " " diff --git a/trackhub/genome.py b/trackhub/genome.py index a6e6b61..89d6866 100644 --- a/trackhub/genome.py +++ b/trackhub/genome.py @@ -35,7 +35,7 @@ def __init__(self, genome, trackdb=None, genome_file_obj=None, **kwargs): self._orig_kwargs = kwargs self.track_field_order = [] - self.track_field_order.extend(constants.track_fields['genome']) + self.track_field_order.extend(constants.track_fields["genome"]) self.add_params(**kwargs) @@ -62,8 +62,7 @@ def add_params(self, **kw): if k not in self.track_field_order: raise ParameterError( '"{0}" is not a valid parameter for {1} with ' - 'tracktype {2}' - .format(k, self.__class__.__name__, self.tracktype) + "tracktype {2}".format(k, self.__class__.__name__, self.tracktype) ) constants.param_dict[k].validate(v) @@ -88,12 +87,11 @@ def __str__(self): except ValidationError: return "Unconfigured object" s = [] - s.append('genome %s' % self.genome) + s.append("genome %s" % self.genome) s.append( - 'trackDb %s' + "trackDb %s" % os.path.relpath( - self.trackdb.filename, - os.path.dirname(self.genome_file_obj.filename) + self.trackdb.filename, os.path.dirname(self.genome_file_obj.filename) ) ) @@ -105,16 +103,15 @@ def __str__(self): self.kwargs = self._orig_kwargs.copy() - return '\n'.join(s) + '\n' + return "\n".join(s) + "\n" def validate(self): if len(self.children) == 0: - raise ValidationError( - "No TrackDb objects provided") + raise ValidationError("No TrackDb objects provided") if self.trackdb is None: raise ValidationError("No TrackDb objects provided") - def _render(self, staging='staging'): + def _render(self, staging="staging"): """ No file is created from a Genome object -- only from its parent GenomesFile object. diff --git a/trackhub/genomes_file.py b/trackhub/genomes_file.py index 0dc0381..eaf2f93 100644 --- a/trackhub/genomes_file.py +++ b/trackhub/genomes_file.py @@ -38,8 +38,8 @@ def filename(self): if self.hub is None: return None return os.path.join( - os.path.dirname(self.hub.filename), - self.hub.hub + '.genomes.txt') + os.path.dirname(self.hub.filename), self.hub.hub + ".genomes.txt" + ) @filename.setter def filename(self, fn): @@ -51,8 +51,7 @@ def hub(self): if level is None: return None if level != -1: - raise ValueError( - "Found a hub at %s levels away -- needs to be -1" % level) + raise ValueError("Found a hub at %s levels away -- needs to be -1" % level) return hub def add_genome(self, genome): @@ -63,17 +62,16 @@ def __str__(self): s = [] for genome in self.genomes: s.append(str(genome)) - return '\n'.join(s) + '\n' + return "\n".join(s) + "\n" - def _render(self, staging='staging'): + def _render(self, staging="staging"): rendered_filename = os.path.join(staging, self.filename) self.makedirs(rendered_filename) - fout = open(rendered_filename, 'w') + fout = open(rendered_filename, "w") fout.write(str(self)) fout.close() return fout.name def validate(self): if len(self.children) == 0: - raise ValueError( - "No defined Genome objects to use") + raise ValueError("No defined Genome objects to use") diff --git a/trackhub/groups.py b/trackhub/groups.py index 5b82162..9b252b4 100644 --- a/trackhub/groups.py +++ b/trackhub/groups.py @@ -8,7 +8,6 @@ class GroupDefinition(object): - def __init__(self, name, label=None, priority=1, default_is_closed=0): """ Represents a group of tracks in a trackhub. @@ -43,12 +42,12 @@ def __init__(self, name, label=None, priority=1, default_is_closed=0): def __str__(self): s = [ - 'name %s' % self.name, - 'label %s' % self.label, - 'priority %s' % self.priority, - 'defaultIsClosed %d' % self.default_is_closed + "name %s" % self.name, + "label %s" % self.label, + "priority %s" % self.priority, + "defaultIsClosed %d" % self.default_is_closed, ] - return '\n'.join(s) + '\n' + return "\n".join(s) + "\n" class GroupsFile(HubComponent): @@ -85,7 +84,7 @@ def __str__(self): """ Render groups.txt file. """ - return '\n'.join(g.__str__() for g in self.groups) + return "\n".join(g.__str__() for g in self.groups) @property def genomes_file(self): @@ -102,7 +101,7 @@ def genome(self): if level is None: return None if level != -1: - raise ValueError('Genome is level %s, not -1' % level) + raise ValueError("Genome is level %s, not -1" % level) return genome @property @@ -117,8 +116,11 @@ def filename(self): return None else: - return os.path.join(os.path.dirname(self.genomes_file.filename), - self.genome.genome, 'groups.txt') + return os.path.join( + os.path.dirname(self.genomes_file.filename), + self.genome.genome, + "groups.txt", + ) @filename.setter def filename(self, fn): @@ -128,16 +130,17 @@ def validate(self): if self.genome is None: raise ValidationError( "GroupsFile object must be attached to an Genome instance " - "or subclass") + "or subclass" + ) pass - def _render(self, staging='staging'): + def _render(self, staging="staging"): """ Renders the children GroupDefinition objects to file """ rendered_filename = os.path.join(staging, self.filename) self.makedirs(rendered_filename) - fout = open(rendered_filename, 'w') + fout = open(rendered_filename, "w") fout.write(str(self)) fout.close() return fout.name diff --git a/trackhub/helpers.py b/trackhub/helpers.py index 7c52005..888a835 100644 --- a/trackhub/helpers.py +++ b/trackhub/helpers.py @@ -18,9 +18,8 @@ def dimensions_from_subgroups(s): ---------- s : list of SubGroup objects (or anything with a `name` attribute) """ - letters = 'XYABCDEFGHIJKLMNOPQRSTUVWZ' - return ' '.join(['dim{0}={1}'.format(dim, sg.name) - for dim, sg in zip(letters, s)]) + letters = "XYABCDEFGHIJKLMNOPQRSTUVWZ" + return " ".join(["dim{0}={1}".format(dim, sg.name) for dim, sg in zip(letters, s)]) def filter_composite_from_subgroups(s): @@ -40,10 +39,10 @@ def filter_composite_from_subgroups(s): used, just the number of items. """ dims = [] - for letter, sg in zip('ABCDEFGHIJKLMNOPQRSTUVWZ', s[2:]): - dims.append('dim{0}'.format(letter)) + for letter, sg in zip("ABCDEFGHIJKLMNOPQRSTUVWZ", s[2:]): + dims.append("dim{0}".format(letter)) if dims: - return ' '.join(dims) + return " ".join(dims) def hex2rgb(h): @@ -58,13 +57,18 @@ def hex2rgb(h): >>> hex2rgb("#ff0033") '255,0,51' """ - if not h.startswith('#') or len(h) != 7: + if not h.startswith("#") or len(h) != 7: raise ValueError("Does not look like a hex color: '{0}'".format(h)) - return ','.join(map(str, ( - int(h[1:3], 16), - int(h[3:5], 16), - int(h[5:7], 16), - ))) + return ",".join( + map( + str, + ( + int(h[1:3], 16), + int(h[3:5], 16), + int(h[5:7], 16), + ), + ) + ) def sanitize(s, strict=True): @@ -82,20 +86,20 @@ def sanitize(s, strict=True): If True, only alphanumeric characters are allowed. If False, a limited set of additional characters (-._) will be allowed. """ - allowed = ''.join( + allowed = "".join( [ - 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', - 'abcdefghijklmnopqrstuvwxyz', - '0123456789', + "ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyz", + "0123456789", ] ) if not strict: - allowed += '-_.' + allowed += "-_." - s = str(s).replace(' ', '_') + s = str(s).replace(" ", "_") - return ''.join([i for i in s if i in allowed]) + return "".join([i for i in s if i in allowed]) def auto_track_url(track): @@ -112,7 +116,8 @@ def auto_track_url(track): if hub is None: raise ValueError( - "track is not fully connected because the root is %s" % repr(hub)) + "track is not fully connected because the root is %s" % repr(hub) + ) if hub.url is None: raise ValueError("hub.url is not set") if track.source is None: @@ -139,11 +144,13 @@ def print_rendered_results(results_dict): Rendered results can be multiply-nested dictionaries; this uses JSON serialization to print a nice representation. """ + class _HubComponentEncoder(json.JSONEncoder): def default(self, o): if isinstance(o, base.HubComponent): return repr(o) return json.JSONEncoder.default(self, o) + formatted = json.dumps(results_dict, indent=4, cls=_HubComponentEncoder) # the returned string contains lines with trailing spaces, which causes # doctests to fail. So fix that here. @@ -156,7 +163,7 @@ def data_dir(): Returns the data directory that contains example files for tests and documentation. """ - return os.path.join(os.path.dirname(_here), 'test', 'data') + return os.path.join(os.path.dirname(_here), "test", "data") def example_bigbeds(): @@ -167,7 +174,7 @@ def example_bigbeds(): d = data_dir() for fn in os.listdir(d): fn = os.path.join(d, fn) - if os.path.splitext(fn)[-1] == '.bigBed': + if os.path.splitext(fn)[-1] == ".bigBed": hits.append(os.path.abspath(fn)) return hits @@ -180,6 +187,6 @@ def example_bigwigs(): d = data_dir() for fn in os.listdir(d): fn = os.path.join(d, fn) - if os.path.splitext(fn)[-1] == '.bw': + if os.path.splitext(fn)[-1] == ".bw": hits.append(os.path.abspath(fn)) return hits diff --git a/trackhub/hub.py b/trackhub/hub.py index 2220b1b..46b072b 100644 --- a/trackhub/hub.py +++ b/trackhub/hub.py @@ -10,9 +10,17 @@ class Hub(HubComponent): # map proper track hub stanza field names to pythonic attribute names in # this class. - def __init__(self, hub, short_label=None, long_label=None, - genomes_file=None, genomes_filename=None, email="", - url=None, filename=None): + def __init__( + self, + hub, + short_label=None, + long_label=None, + genomes_file=None, + genomes_filename=None, + email="", + url=None, + filename=None, + ): """ Represents a top-level track hub container. @@ -44,8 +52,7 @@ def __init__(self, hub, short_label=None, long_label=None, HubComponent.__init__(self) if url is not None: self.url = url - warnings.DeprecationWarning( - 'url is no longer used for Hub objects') + warnings.DeprecationWarning("url is no longer used for Hub objects") self.hub = hub if not short_label: short_label = hub @@ -60,13 +67,14 @@ def __init__(self, hub, short_label=None, long_label=None, self.add_genomes_file(genomes_file) if filename is None: - filename = hub + '.hub.txt' + filename = hub + ".hub.txt" self.filename = filename def validate(self): if self.genomes_file is None: raise ValidationError( - 'No GenomesFile attached to Hub (use add_genomes_file())') + "No GenomesFile attached to Hub (use add_genomes_file())" + ) self.genomes_file.validate() return True @@ -83,23 +91,23 @@ def __str__(self): if self.genomes_file: genomes_file = self.genomes_file.filename for label, value in [ - ('hub', 'hub'), - ('shortLabel', self.short_label), - ('longLabel', self.long_label), - ('genomesFile', genomes_file), - ('email', self.email), + ("hub", "hub"), + ("shortLabel", self.short_label), + ("longLabel", self.long_label), + ("genomesFile", genomes_file), + ("email", self.email), ]: - s.append('{0} {1}'.format(label, value)) - return '\n'.join(s) + s.append("{0} {1}".format(label, value)) + return "\n".join(s) - def _render(self, staging='staging'): + def _render(self, staging="staging"): """ Render just this object, and not all the underlying GenomeFiles and their TrackDb. """ rendered_filename = os.path.join(staging, self.filename) self.makedirs(rendered_filename) - fout = open(rendered_filename, 'w') + fout = open(rendered_filename, "w") fout.write(str(self)) fout.close() return fout.name diff --git a/trackhub/parse.py b/trackhub/parse.py index 4accdde..1422820 100644 --- a/trackhub/parse.py +++ b/trackhub/parse.py @@ -12,8 +12,9 @@ def make_soup(): # Initial workarounds chopped up the file and parsed individually, but the # most lenient html5lib parse seems to do the trick. response = requests.get( - 'https://genome.ucsc.edu/goldenPath/help/trackDb/trackDbHub.html') - soup = bs4.BeautifulSoup(response.text, 'html5lib') + "https://genome.ucsc.edu/goldenPath/help/trackDb/trackDbHub.html" + ) + soup = bs4.BeautifulSoup(response.text, "html5lib") return soup @@ -38,44 +39,44 @@ def get_supported_types(soup):
  • (don't care about this one since not in a ul) """ - abam = soup.find('a', href=re.compile('#bam')) + abam = soup.find("a", href=re.compile("#bam")) ol = abam.parent.parent.parent.parent supported_types = [] - for ul in ol.find_all('ul'): - for a in ul.find_all('a'): - ref = a['href'].replace('#', '') + for ul in ol.find_all("ul"): + for a in ul.find_all("a"): + ref = a["href"].replace("#", "") # some have text like "#bigMaf_-_Multiple_Alignments", so split on # the '_' - ref = ref.split('_')[0] + ref = ref.split("_")[0] supported_types.append(ref) # multiWig is not included in this table, but it's considered a type in the # divs. - supported_types.append('multiWig') + supported_types.append("multiWig") # If a setting pertains to all types, they are not all listed, rather, the # "all" type is specified. - supported_types.append('all') + supported_types.append("all") return supported_types def support_level(soup): # Another set of tables stores the support level - settings_tables = soup.find_all('table', class_='settingsTable') + settings_tables = soup.find_all("table", class_="settingsTable") support_levels = {} for t in settings_tables: - for td in t.find_all('td'): - if not td.has_attr('class'): + for td in t.find_all("td"): + if not td.has_attr("class"): continue - key = td['class'] + key = td["class"] assert len(key) == 1 key = key[0] - code = td.find('code') - if not code or not code.has_attr('class'): + code = td.find("code") + if not code or not code.has_attr("class"): continue - support = code['class'] + support = code["class"] support_levels[key] = support return support_levels @@ -141,16 +142,16 @@ def keep(tag): they're from. However some are examples that luckily are tagged as such (e.g., bed_example). """ - if tag.name != 'span': + if tag.name != "span": return - if tag.parent.has_attr('class'): - for c in tag.parent['class']: - if 'example' in c: + if tag.parent.has_attr("class"): + for c in tag.parent["class"]: + if "example" in c: return - if tag.has_attr('class'): - if 'types' in tag['class']: - if 'customTracks' not in tag['class']: + if tag.has_attr("class"): + if "types" in tag["class"]: + if "customTracks" not in tag["class"]: return True d = soup.find_all(keep) @@ -161,7 +162,13 @@ def keep(tag): for i in d: div = i.parent - _id = div.attrs['class'] + + # 2024-04-13: maxWindowCoverage appears to be wrapped in an + # additional , so we need to get *it's* parent. + if div.name == "code": # not 'div'... + div = div.parent + + _id = div.attrs["class"] assert len(_id) == 1 _id = _id[0] @@ -170,26 +177,26 @@ def keep(tag): # though. debug[_id] = div - types = set(i.attrs['class']).intersection(supported_types) + types = set(i.attrs["class"]).intersection(supported_types) if len(types.intersection(supported_types)) == 0: continue types = list(types) - fmt = div.find_all(name='div', attrs='format') + fmt = div.find_all(name="div", attrs="format") assert len(fmt) == 1 fmt = fmt[0] - required_p = div.find_all(name='p', attrs='isRequired') + required_p = div.find_all(name="p", attrs="isRequired") required = False if required_p: for i in required_p: - if 'yes' in i.text.lower() or 'for hubs' in i.text.lower(): + if "yes" in i.text.lower() or "for hubs" in i.text.lower(): required = True # Some, like bamGrayMode, have several "sub names" like bamGrayMode, # aliQualRnage, baseQualRange. Handle those here. - formats = fmt.find_all('code') + formats = fmt.find_all("code") if formats is None: continue else: @@ -198,37 +205,36 @@ def keep(tag): # Most non-hub-relevant settings are filtered out py the supported # types filter, but some sneak through (e.g. several only used by # ENCODE). - no_hub = div.find_all('p', string=re.compile('NOT FOR HUBS')) + no_hub = div.find_all("p", string=re.compile("NOT FOR HUBS")) if no_hub: continue - example = div.find('pre') + example = div.find("pre") if example is not None: example = str(example.string) - desc = div.find_all('p') + desc = div.find_all("p") if desc is not None: - desc = ' '.join([' '.join(''.join(i.strings).split()) for i in - desc]) + desc = " ".join([" ".join("".join(i.strings).split()) for i in desc]) if _id in specs: raise ValueError("duplicate value for {}".format(_id)) # We are only concerned with settings, not types. Types (bigBed, bam, # etc) have "type" in their format. - if any([i.split()[0] == 'type' for i in formats]): + if any([i.split()[0] == "type" for i in formats]): continue # Special cases # - if _id in ['view', 'subGroupN', 'parent_view']: + if _id in ["view", "subGroupN", "parent_view"]: continue spec = { - 'format': formats, - 'types': sorted(types), - 'required': required, - 'example': example, - 'desc': desc, + "format": formats, + "types": sorted(types), + "required": required, + "example": example, + "desc": desc, } specs[_id] = spec @@ -254,38 +260,50 @@ def print_parsed(specs): specs : dict Returned dictionary from parse_divs() """ + observed_types = set() for i in specs.values(): - observed_types.update(i['types']) + observed_types.update(i["types"]) observed_types = sorted(observed_types) - s = ['# Observed types from the parsed document'] - s.append('TRACKTYPES = [') + s = ["# Observed types from the parsed document"] + s.append("TRACKTYPES = [") for i in observed_types: - s.append(" '{}',".format(i)) - s.append(']') - print('\n'.join(s) + '\n') + s.append(' "{}",'.format(i)) + s.append("]") + print("\n".join(s) + "\n") - data_types = specs['bigDataUrl']['types'] + data_types = specs["bigDataUrl"]["types"] - s = ['# Tracks for which the definition specifies bigDataUrl'] - s.append('DATA_TRACKTYPES = [') + s = ["# Tracks for which the definition specifies bigDataUrl"] + s.append("DATA_TRACKTYPES = [") for i in data_types: - s.append(" '{}',".format(i)) - s.append(']') - print('\n'.join(s) + '\n') - print('param_defs = [') + s.append(' "{}",'.format(i)) + s.append("]") + print("\n".join(s) + "\n") + print("param_defs = [") print() + + def _quote(x): + """ + Python __repr__ uses single quotes, but we're aiming for a pep8 + format with double quotes. Fix that here. + """ + s = str(x) + if '"' in s: + return s + return s.replace("'", '"') + for k, v in sorted(specs.items()): print( ( - ''' + f""" Param( name="{k}", - fmt={v[format]}, - types={v[types]}, - required={v[required]}, - validator=str),'''.format(**locals()) + fmt={_quote(v['format'])}, + types={_quote(v['types'])}, + required={_quote(v['required'])}, + validator=str),""" ) ) diff --git a/trackhub/parsed_params.py b/trackhub/parsed_params.py index 3b38300..4b12cc1 100644 --- a/trackhub/parsed_params.py +++ b/trackhub/parsed_params.py @@ -14,827 +14,1015 @@ # Observed types from the parsed document TRACKTYPES = [ - 'all', - 'bam', - 'bigBarChart', - 'bigBed', - 'bigChain', - 'bigGenePred', - 'bigInteract', - 'bigLolly', - 'bigMaf', - 'bigNarrowPeak', - 'bigPsl', - 'bigWig', - 'compositeTrack', - 'halSnake', - 'hic', - 'multiWig', - 'subGroups', - 'superTrack', - 'vcfPhasedTrio', - 'vcfTabix', - 'view', - + "all", + "bam", + "bigBarChart", + "bigBed", + "bigChain", + "bigGenePred", + "bigInteract", + "bigLolly", + "bigMaf", + "bigNarrowPeak", + "bigPsl", + "bigWig", + "compositeTrack", + "halSnake", + "hic", + "multiWig", + "subGroups", + "superTrack", + "vcfPhasedTrio", + "vcfTabix", + "view", # assembly tracks are not defined in the document; we need to add # separately. - 'assembly', - + "assembly", # neither are genome objects, but we want to support arguments like # defaultPos so it needs to be added here. - 'genome', + "genome", ] # Tracks for which the definition specifies bigDataUrl DATA_TRACKTYPES = [ - 'bam', - 'bigBarChart', - 'bigBed', - 'bigChain', - 'bigInteract', - 'bigLolly', - 'bigMaf', - 'bigPsl', - 'bigWig', - 'hic', - 'vcfPhasedTrio', - 'vcfTabix', + "bam", + "bigBarChart", + "bigBed", + "bigChain", + "bigInteract", + "bigLolly", + "bigMaf", + "bigPsl", + "bigWig", + "hic", + "vcfPhasedTrio", + "vcfTabix", ] param_defs = [ - Param( name="aggregate", - fmt=['aggregate '], - types=['multiWig'], + fmt=["aggregate "], + types=["multiWig"], required=False, - validator=set(['transparentOverlay', 'stacked', 'solidOverlay', 'none'])), - + validator=set(["transparentOverlay", "stacked", "solidOverlay", "none"]), + ), Param( name="aliQualRange", - fmt=['bamGrayMode ', 'aliQualRange ', 'baseQualRange '], - types=['bam'], - required=False, - validator=validate.ColSV2), - + fmt=[ + "bamGrayMode ", + "aliQualRange ", + "baseQualRange ", + ], + types=["bam"], + required=False, + validator=validate.ColSV2, + ), Param( name="allButtonPair", - fmt=['allButtonPair on'], - types=['compositeTrack'], + fmt=["allButtonPair on"], + types=["compositeTrack"], required=False, - validator=set(['on'])), - + validator=set(["on"]), + ), Param( name="altColor", - fmt=['altColor '], - types=['all'], + fmt=["altColor "], + types=["all"], required=False, - validator=validate.RGB), - + validator=validate.RGB, + ), Param( name="alwaysZero", - fmt=['alwaysZero '], - types=['bigWig'], + fmt=["alwaysZero "], + types=["bigWig"], required=False, - validator=set(['on', 'off'])), - + validator=set(["on", "off"]), + ), Param( name="autoScale", - fmt=['autoScale '], - types=['bigWig', 'hic', 'compositeTrack'], + fmt=["autoScale "], + types=["bigWig", "hic", "compositeTrack"], required=False, - validator=set(['on', 'off', 'group'])), - + validator=set(["on", "off", "group"]), + ), Param( name="bamColorMode", - fmt=['bamColorMode '], - types=['bam'], + fmt=["bamColorMode "], + types=["bam"], required=False, - validator=set(['strand', 'gray', 'tag', 'off'])), - + validator=set(["strand", "gray", "tag", "off"]), + ), Param( name="bamColorTag", - fmt=['bamColorTag '], - types=['bam'], + fmt=["bamColorTag "], + types=["bam"], required=False, - validator=str), - + validator=str, + ), Param( name="bamGrayMode", - fmt=['bamGrayMode ', 'aliQualRange ', 'baseQualRange '], - types=['bam'], - required=False, - validator=set(['aliQual', 'baseQual', 'unpaired'])), - + fmt=[ + "bamGrayMode ", + "aliQualRange ", + "baseQualRange ", + ], + types=["bam"], + required=False, + validator=set(["aliQual", "baseQual", "unpaired"]), + ), Param( name="bamSkipPrintQualScore", - fmt=['bamSkipPrintQualScore .'], - types=['bam'], + fmt=["bamSkipPrintQualScore ."], + types=["bam"], required=False, - validator=set(['.'])), - + validator=str, + ), + Param( + name="barChartBarMinPadding", + fmt=["barChartBarMinPadding "], + types=["bigBarChart"], + required=False, + validator=str, + ), + Param( + name="barChartBarMinWidth", + fmt=["barChartBarMinWidth "], + types=["bigBarChart"], + required=False, + validator=str, + ), Param( name="barChartBars", - fmt=['barChartBars '], - types=['bigBarChart'], + fmt=["barChartBars "], + types=["bigBarChart"], required=False, - validator=str), - + validator=str, + ), Param( name="barChartCategoryUrl", - fmt=['barChartCategoryUrl '], - types=['bigBarChart'], + fmt=["barChartCategoryUrl "], + types=["bigBarChart"], required=False, - validator=str), - + validator=str, + ), Param( name="barChartColors", - fmt=['barChartColors '], - types=['bigBarChart'], + fmt=["barChartColors "], + types=["bigBarChart"], required=False, - validator=str), - + validator=str, + ), + Param( + name="barChartFacets", + fmt=["barChartFacets "], + types=["bigBarChart"], + required=False, + validator=str, + ), Param( name="barChartLabel", - fmt=['barChartLabel