Skip to content

Commit

Permalink
overhaul project
Browse files Browse the repository at this point in the history
  • Loading branch information
dmyersturnbull committed Jan 22, 2024
1 parent fa1f1f3 commit 44df540
Show file tree
Hide file tree
Showing 310 changed files with 6,319 additions and 4,851 deletions.
46 changes: 46 additions & 0 deletions .github/ISSUE_TEMPLATE/bug.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd
# SPDX-License-Identifier: Apache-2.0

name: "🐛 Bug"
description: "Bug report"
labels: ["type: fix"]

body:
- type: textarea
id: description
attributes:
label: What happened?
placeholder: Detail what you did and what happened.
validations:
required: true
- type: dropdown
id: os
attributes:
label: What OS types are you seeing the problem on?
multiple: true
options:
- Windows
- Linux
- macOS
validations:
required: false
- type: dropdown
id: browsers
attributes:
label: What browsers are you seeing the problem on?
multiple: true
options:
- Chrome
- Firefox
- Edge
- Safari
- Opera
- Samsung Internet
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output.
16 changes: 16 additions & 0 deletions .github/ISSUE_TEMPLATE/docs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd
# SPDX-License-Identifier: Apache-2.0

name: "📚 Documentation"
description: "Documentation issue"
labels: ["type: docs"]

body:
- type: textarea
id: description
attributes:
label: What needs work?
placeholder: Describe the documentation issue.
validations:
required: true
16 changes: 16 additions & 0 deletions .github/ISSUE_TEMPLATE/feature.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd
# SPDX-License-Identifier: Apache-2.0

name: "✨ Feature"
description: "Feature request"
labels: ["type: feature"]

body:
- type: textarea
id: description
attributes:
label: What is the feature request?
placeholder: Describe the feature you would like.
validations:
required: true
51 changes: 51 additions & 0 deletions .github/ISSUE_TEMPLATE/performance.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# SPDX-FileCopyrightText: Copyright 2020-2023, Contributors to CICD
# SPDX-PackageHomePage: https://github.com/dmyersturnbull/cicd
# SPDX-License-Identifier: Apache-2.0

name: "🐢 Performance"
description: "Performance issue"
labels: ["type: performance"]

body:
- type: textarea
id: description
attributes:
label: What is the performance issue?
placeholder: Detail your steps and where the performance dropped.
validations:
required: true
- type: input
id: version
attributes:
label: Version
description: What version are you running?
- type: dropdown
id: os
attributes:
label: What OS types are you seeing the problem on?
multiple: true
options:
- Windows
- Linux
- macOS
validations:
required: false
- type: dropdown
id: browsers
attributes:
label: What browsers are you seeing the problem on?
multiple: true
options:
- Chrome
- Firefox
- Edge
- Safari
- Opera
- Samsung Internet
validations:
required: false
- type: textarea
id: logs
attributes:
label: Relevant log output
description: Please copy and paste any relevant log output.
1 change: 1 addition & 0 deletions .github/PULL_REQUEST_TEMPLATE/pull_request.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
# Pull request
43 changes: 43 additions & 0 deletions .github/workflows/build.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# This workflow will build a Java project with Gradle
# For more information see: https://help.github.com/actions/language-and-framework-guides/building-and-testing-java-with-gradle

name: Test

on:
workflow_call:
inputs:
ref:
type: string
description: Ref to checkout
default: main

permissions:
contents: read
statuses: read
actions: read
security-events: write

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up JDK 20
uses: actions/setup-java@v2
with:
distribution: oracle
java-version: "21-ea"
- uses: gradle/wrapper-validation-action@v1
- name: Set up and execute Gradle test
uses: gradle/gradle-build-action@v2
with:
arguments: test
- uses: actions/upload-artifact@v3
with:
name: package
path: build/libs
- uses: actions/upload-artifact@v3
with:
name: build-reports
path: build/reports/
30 changes: 30 additions & 0 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Various static code analysis and checks
# Runs on commit or pull (to any branch)
# and on a schedule (weekly)
name: Static checks
on:
push:
pull_request:
schedule:
- cron: 0 7 * * 6
jobs:
markdown-link-check:
name: Check Markdown links
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@master
- uses: gaurav-nelson/github-action-markdown-link-check@v1
analyse:
name: Analyze with CodeQL
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
fetch-depth: 2
- name: Initialize CodeQL
uses: github/codeql-action/init@v1
- name: Autobuild with CodeQL
uses: github/codeql-action/autobuild@v1
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v1
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ Gemfile.lock
# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*

# IDE files
.idea

# Keep these config files
!/.gitignore
!/.travis.yml
Expand Down
42 changes: 25 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,34 @@
![stability-stable](https://img.shields.io/badge/stability-stable-green.svg)
![Active](https://img.shields.io/static/v1?label=development&message=active&color=green)
[![License](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
![Latest release](https://img.shields.io/github/v/tag/dmyersturnbull/genomics-io)
![Latest release](https://img.shields.io/github/v/tag/dmyersturnbull/bioio)
![Java compatibility](https://img.shields.io/static/v1?label=Java&message=14%2b)
![Maven Central](https://img.shields.io/maven-central/v/dmyersturnbull/genomics-io)
![GitHub last commit](https://img.shields.io/github/last-commit/dmyersturnbull/genomics-io?color=green)
![Maven Central](https://img.shields.io/maven-central/v/dmyersturnbull/bioio)
![GitHub last commit](https://img.shields.io/github/last-commit/dmyersturnbull/bioio?color=green)

Efficient, high-quality streaming parsers and writers for 12 text-based formats used in bioinformatics.

The goal is to have the best possible parsers for the most problematic ancient formats.

**Supported formats:**
VCF, FASTA, GenBank, BED, GFF/GTV/GVF, UCSC chain,
pre-MAKEPED, BGEE, Turtle/RDF,
matrices/tables/CSV/TSV

- Variant calls: VCF
- Gene features: GenBank, BED, GFF3, GTF, GVF
- Sequences: FASTA, FASTA alignment, FASTQ
- Expression: BGEE
- Coordinate mapping: UCSC chain
- Phylogenetics/pedigree: pre-MAKEPED
- Protein structure: PDB
- Triples: Turtle, RDF
- Tabular: CSV, TSV, etc.

**Features & choices:**

- Reads and writes Java 8+ Streams, keeping only essential metadata in memory.
- Parses every part of a format, leaving nothing as text unnecessarily.
- Has a consistent API. Coordinates are always 0-indexed and text is always escaped (according to specifications).
- Immutable, thread-safe, null-pointer-safe (`Optional<>`), and arbitrary-precision.
- All methods are either exposed through interfaces, or reside in records, enums, and final classes

#### Example:

Expand All @@ -33,7 +41,7 @@ import org.pharmgkb.parsers.vcf;

Stream<VcfPosition> goodMitochondrialCalls = new VcfDataParser().parseFile(path)
.filter(p -> p.chromosome.isMitochondial())
.filter(VcfFilters.qualityAtLeast(10)) // converts to BigDecimal
.filter(VcfFilters.qualityAtLeast(10)); // converts to BigDecimal

new VcfDataWriter().writeToFile(goodMitochondrialCalls, filteredPath);
```
Expand Down Expand Up @@ -67,7 +75,7 @@ implementation group: 'com.pharmgkb.bioio', name: 'bioio', version: '0.3.0'

#### Pre-build JAR

[Releases](https://github.com/dmyersturnbull/genomics-io/releases) contain both _fat_ JARs (containing dependencies)
[Releases](https://github.com/dmyersturnbull/bioio/releases) contain both _fat_ JARs (containing dependencies)
and _thin_ JARs (without dependencies), independently for each subproject
(e.g. `bioio-vcf` for VCF, or `bioio-gff` for GFF/GTV/GVF).

Expand All @@ -84,7 +92,7 @@ Formats listed in bold are currently implemented.

- Variant calls: **VCF**
- Gene features: **GenBank, BED, GFF3, GTF, GVF**
- Sequences: **FASTA**, EMBL, FASTA alignment, **FASTQ**, Seq, faidx (FASTQ indices)
- Sequences: **FASTA**, EMBL, **FASTA alignment**, **FASTQ**, Seq, faidx (FASTQ indices)
- Expression: **BGEE**
- Coordinate mapping: **UCSC chain**
- Phylogenetics & pedigrees: **pre-MAKEPED**, LINKAGE, Nexus
Expand All @@ -93,7 +101,7 @@ Formats listed in bold are currently implemented.
- Protein structure: PDB (non-comprehensive)
- RNA structure: Bpseq, Connect/CT, Vienna, Base-Paring, Dot-Bracket, Dot-Plot
- Other: cytoband
- Misc: Matrices/tables/CSV/TSV, alignment, **Turtle (and RDF)**
- Misc: **Matrices/tables/CSV/TSV**, **Turtle (and RDF)**

### Extra things

Expand Down Expand Up @@ -220,17 +228,17 @@ List<AlignmentResult> topScores = parser.parseAll(Files.lines(fastaFile))
```java
// Stream Triples in Turtle format from a URL
/*
@prefix myPrefix: <http://abc#owner> .
<http://abc#cat> "belongsTo" @myPrefix ;
"hasSynonym" <http://abc#feline> .
@prefix myPrefix: <https://abc#owner> .
<https://abc#cat> "belongsTo" @myPrefix ;
"hasSynonym" <https://abc#feline> .
*/
Stream<String> input = null;
try (BufferedReader reader = new BufferedReader(new InputStreamReader((HttpURLConnection) myUrl.openConnection()).getInputStream()))) {
input = reader.lines();
}
TripleParser parser = new TripleParser(true); // usePrefixes=true will replace prefixes
Stream<Triple> stream = input.map(new TripleParser());
// contains: List[ http://abc#cat belongsTo http://abc#owner , http://abc#cat hasSynonym http://abc#feline ]
// contains: List[ https://abc#cat belongsTo https://abc#owner , https://abc#cat hasSynonym https://abc#feline ]
List<Prefix> prefixes = parser.getPrefixes();
```

Expand All @@ -257,18 +265,18 @@ Map<String, Long> genotypeCounts = new VcfDataParser().parseAll(input)
```

```java
Stream<BigDecimal> MatrixParser.tabs().parseAll(file).map(BigDecimal::new);
Stream<BigDecimal> org.pharmgkb.parsers.text.MatrixParserI.tabs().parseAll(file).map(BigDecimal::new);
```

### Guiding principles

1. Where possible, a parser is a `Function<String, R>` or `Function<Stream<String>, R>`,
and writer is a `Function<R, String>` or `Function<R, Stream<String>>`.
[Java 8+ Streams](http://www.oracle.com/technetwork/articles/java/ma14-java-se-8-streams-2177646.html)
[Java 8+ Streams](https://www.oracle.com/technetwork/articles/java/ma14-java-se-8-streams-2177646.html)
are expected to be used.
2. Null values are generally banned from public methods in favor of
[`Optional`](https://download.java.net/java/early_access/jdk16/docs/api/java.base/java/util/Optional.html).
See http://www.oracle.com/technetwork/articles/java/java8-optional-2175753.html for more information.
See https://www.oracle.com/technetwork/articles/java/java8-optional-2175753.html for more information.
3. Most operations are thread-safe. Thread safety is annotated using `javax.annotation.concurrent`.
4. Top-level data classes are immutable, as annotated by or `javax.annotation.concurrent.Immutable`.
5. The builder pattern is used for non-trivial classes. Each builder has a copy constructor.
Expand Down
4 changes: 2 additions & 2 deletions bed/build.gradle
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
dependencies {
compile project(':core')
}
api project(':core')
}
2 changes: 0 additions & 2 deletions bed/lombok.config

This file was deleted.

9 changes: 4 additions & 5 deletions bed/src/main/java/org/pharmgkb/parsers/bed/BedParser.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package org.pharmgkb.parsers.bed;

import org.pharmgkb.parsers.BadDataFormatException;
import org.pharmgkb.parsers.LineParser;
import org.pharmgkb.parsers.bed.model.BedFeature;
import org.pharmgkb.parsers.model.Strand;
import org.slf4j.Logger;
Expand Down Expand Up @@ -42,18 +41,18 @@
*
* @author Douglas Myers-Turnbull
* @see BedFeature
* @see org.pharmgkb.parsers.bed.BedWriter
* @see BedWriter
*/
@ThreadSafe
public class BedParser implements LineParser<BedFeature> {
public class BedParser implements BedParserI {

private static final long sf_logEvery = 10000;
private static final Pattern sf_comma = Pattern.compile(",");
private static final Pattern sf_tab = Pattern.compile("\t");

private static final Logger sf_logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

private AtomicLong m_lineNumber = new AtomicLong(0L);
private final AtomicLong m_lineNumber = new AtomicLong(0L);

@Nonnull
@Override
Expand Down Expand Up @@ -81,7 +80,7 @@ public BedFeature apply(@Nonnull String line) throws BadDataFormatException {
builder.setScore(Integer.parseInt(parts[4]));
}
if (parts.length > 5) {
builder.setStrand(Strand.lookupBySymbol(parts[5]));
builder.setStrand(Strand.fromSymbol(parts[5]));
}
if (parts.length > 6) {
builder.setThickStart(Long.parseLong(parts[6]));
Expand Down
8 changes: 8 additions & 0 deletions bed/src/main/java/org/pharmgkb/parsers/bed/BedParserI.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package org.pharmgkb.parsers.bed;

import org.pharmgkb.parsers.LineParser;
import org.pharmgkb.parsers.bed.model.BedFeature;

public interface BedParserI extends LineParser<BedFeature> {

}
Loading

0 comments on commit 44df540

Please sign in to comment.