Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reverse complement #4

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions docs/docco.css
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@
}

@font-face {
font-family: 'novecento-bold';
src: url('public/fonts/novecento-bold.eot');
src: url('public/fonts/novecento-bold.eot?#iefix') format('embedded-opentype'),
url('public/fonts/novecento-bold.woff') format('woff'),
url('public/fonts/novecento-bold.ttf') format('truetype');
font-family: 'roboto-black';
src: url('public/fonts/roboto-black.eot');
src: url('public/fonts/roboto-black.eot?#iefix') format('embedded-opentype'),
url('public/fonts/roboto-black.woff') format('woff'),
url('public/fonts/roboto-black.ttf') format('truetype');
font-weight: normal;
font-style: normal;
}
Expand Down Expand Up @@ -67,14 +67,17 @@ h1, h2, h3, h4, h5, h6 {
color: #112233;
line-height: 1em;
font-weight: normal;
font-family: "novecento-bold";
font-family: "roboto-black";
text-transform: uppercase;
margin: 30px 0 15px 0;
}

h1 {
margin-top: 40px;
}
h2 {
font-size: 1.26em;
}

hr {
border: 0;
Expand Down Expand Up @@ -180,9 +183,18 @@ ul.sections > li > div {
display: block;
}

#jump_page_wrapper{
position: fixed;
right: 0;
top: 0;
bottom: 0;
}

#jump_page {
padding: 5px 0 3px;
margin: 0 0 25px 25px;
max-height: 100%;
overflow: auto;
}

#jump_page .source {
Expand Down
129 changes: 105 additions & 24 deletions docs/fasta-parser.html
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ <h1 id="fasta-parser">fasta-parser</h1>
<p>Buffer Stream parser from FASTA to JSON.</p>
<p>doi: <a href="?">?</a>
author: <a href="http://bmpvieira.com">Bruno Vieira</a>
email: <a href="&#x6d;&#x61;&#105;&#108;&#116;&#x6f;&#58;&#x6d;&#97;&#105;&#x6c;&#64;&#98;&#x6d;&#112;&#118;&#105;&#x65;&#x69;&#x72;&#x61;&#46;&#x63;&#x6f;&#109;">&#x6d;&#97;&#105;&#x6c;&#64;&#98;&#x6d;&#112;&#118;&#105;&#x65;&#x69;&#x72;&#x61;&#46;&#x63;&#x6f;&#109;</a>
email: <a href="mailto:&#x6d;&#97;&#x69;&#108;&#64;&#98;&#109;&#x70;&#118;&#x69;&#101;&#x69;&#114;&#97;&#x2e;&#x63;&#x6f;&#x6d;">&#x6d;&#97;&#x69;&#108;&#64;&#98;&#109;&#x70;&#118;&#x69;&#101;&#x69;&#114;&#97;&#x2e;&#x63;&#x6f;&#x6d;</a>
license: <a href="https://raw.githubusercontent.com/bionode/fasta-parser/master/LICENSE">MIT</a></p>
</blockquote>
<hr>
Expand All @@ -47,22 +47,28 @@ <h2 id="usage">Usage</h2>

<span class="hljs-keyword">var</span> parser = <span class="hljs-built_in">require</span>(<span class="hljs-string">'fasta-parser'</span>)

<span class="hljs-keyword">var</span> fastaData = <span class="hljs-keyword">new</span> Buffer (<span class="hljs-string">'&gt;sequence1\n\
<span class="hljs-keyword">var</span> fastaData = Buffer.from (<span class="hljs-string">'&gt;sequence1\n\
ATGCACGTCACGTCAGTACTCGTCAGTAC\n\
&gt;sequence2\n\
CAGTCCTACTGCATGCATGCATGCATGCATCGATGCATGTCGACTGCATGCATGC\n'</span>)

<span class="hljs-keyword">var</span> parser = fasta()
parser.on(<span class="hljs-string">'data'</span>, <span class="hljs-function"><span class="hljs-keyword">function</span><span class="hljs-params">(data)</span> </span>{
parser.on(<span class="hljs-string">'data'</span>, <span class="hljs-function"><span class="hljs-keyword">function</span>(<span class="hljs-params">data</span>) </span>{
<span class="hljs-built_in">console</span>.log(<span class="hljs-built_in">JSON</span>.parse(data.toString()))
})
parser.write(fastaData)
parser.end()
<span class="hljs-comment">// { id: 'sequence1',</span>
<span class="hljs-comment">// seq: 'ATGCACGTCACGTCAGTACTCGTCAGTAC' }</span>
<span class="hljs-comment">// { id: 'sequence2',</span>
<span class="hljs-comment">// seq: 'CAGTCCTACTGCATGCATGCATGCATGCATCGATGCATGTCGACTGCATGCATGC' }</span>
</code></pre><p>For a more useful API, check the dependent module:</p>
<span class="hljs-comment">// seq: 'CAGTCCTACTGCATGCATGCATGCATGCATCGATGCATGTCGACTGCATGCATGC' }</span></code></pre><p>To include the reverse compliment of the sequences, pass arguments to the parser:</p>
<pre><code><span class="hljs-keyword">var</span> parser = fasta(<span class="hljs-literal">true</span>) <span class="hljs-comment">// Includes the reverse compliment for DNA sequences</span>
<span class="hljs-keyword">var</span> parser = fasta(<span class="hljs-literal">true</span>, <span class="hljs-literal">true</span>) <span class="hljs-comment">// Includes the reverse compliment for RNA sequences</span>

Example output:
<span class="hljs-comment">// { id: 'sequence1',</span>
<span class="hljs-comment">// seq: 'ATGCACGTCACGTCAGTACTCGTCAGTAC',</span>
<span class="hljs-comment">// rc: 'TACGTGCAGTGCAGTCATGAGCAGTCATG' }</span></code></pre><p>For a more useful API, check the dependent module:</p>
<p><a href="http://github.com/bionode/bionode-fasta">bionode-fasta</a></p>

</div>
Expand All @@ -71,43 +77,118 @@ <h2 id="usage">Usage</h2>
<span class="hljs-keyword">var</span> through = <span class="hljs-built_in">require</span>(<span class="hljs-string">'through2'</span>)
<span class="hljs-keyword">var</span> split = <span class="hljs-built_in">require</span>(<span class="hljs-string">'split'</span>)
<span class="hljs-keyword">var</span> pumpify = <span class="hljs-built_in">require</span>(<span class="hljs-string">'pumpify'</span>)
<span class="hljs-keyword">var</span> BufferList = <span class="hljs-built_in">require</span>(<span class="hljs-string">'bl'</span>)

<span class="hljs-built_in">module</span>.exports = <span class="hljs-function"><span class="hljs-keyword">function</span><span class="hljs-params">()</span> </span>{
<span class="hljs-keyword">return</span> pumpify(split(), parser())
<span class="hljs-built_in">module</span>.exports = <span class="hljs-function"><span class="hljs-keyword">function</span> (<span class="hljs-params">includeRevComp, isRna</span>) </span>{
includeRevComp = <span class="hljs-keyword">typeof</span> (includeRevComp) === <span class="hljs-string">'boolean'</span> ? includeRevComp : <span class="hljs-literal">false</span>
isRna = <span class="hljs-keyword">typeof</span> (isRna) === <span class="hljs-string">'boolean'</span> ? isRna : <span class="hljs-literal">false</span>
<span class="hljs-keyword">var</span> bases = includeRevComp ? getBaseComplements(isRna) : <span class="hljs-literal">false</span>
<span class="hljs-keyword">return</span> pumpify(split(), parser(bases))
}

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">parser</span><span class="hljs-params">()</span> </span>{
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">parser</span> (<span class="hljs-params">bases</span>) </span>{
<span class="hljs-keyword">var</span> cacheBuf
<span class="hljs-keyword">var</span> cacheBufLen = <span class="hljs-number">8</span>
<span class="hljs-keyword">var</span> openID = <span class="hljs-keyword">new</span> Buffer(<span class="hljs-string">'{"id":"'</span>)
<span class="hljs-keyword">var</span> closeIDOpenSeq = <span class="hljs-keyword">new</span> Buffer(<span class="hljs-string">'","seq":"'</span>)
<span class="hljs-keyword">var</span> closeSeq = <span class="hljs-keyword">new</span> Buffer(<span class="hljs-string">'"}\n'</span>)
<span class="hljs-keyword">var</span> openID = Buffer.from(<span class="hljs-string">'{"id":"'</span>)
<span class="hljs-keyword">var</span> closeIDOpenSeq = Buffer.from(<span class="hljs-string">'","seq":"'</span>)
<span class="hljs-keyword">var</span> closeSeqOpenRevComp = Buffer.from(<span class="hljs-string">'","rc":"'</span>)
<span class="hljs-keyword">var</span> closeSeq = Buffer.from(<span class="hljs-string">'"}\n'</span>)
<span class="hljs-keyword">var</span> stream = through(transform, flush)

<span class="hljs-keyword">return</span> stream

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">transform</span><span class="hljs-params">(buf, enc, next)</span> </span>{
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">transform</span> (<span class="hljs-params">buf, enc, next</span>) </span>{
<span class="hljs-keyword">if</span> (buf[<span class="hljs-number">0</span>] === <span class="hljs-number">62</span>) { <span class="hljs-comment">// If line starts with '&gt;', this is an ID</span>
<span class="hljs-keyword">if</span> (cacheBuf) { <span class="hljs-comment">// If a previous object is in cache, push it</span>
cacheBuf = Buffer.concat([cacheBuf, closeSeq], cacheBufLen+<span class="hljs-number">3</span>)
<span class="hljs-keyword">this</span>.push(cacheBuf)
cacheBuf.append(closeSeq)
<span class="hljs-keyword">this</span>.push(cacheBuf.slice())
}
<span class="hljs-keyword">var</span> id = buf.toString().slice(<span class="hljs-number">1</span>).trim().replace(<span class="hljs-regexp">/"/g</span>, <span class="hljs-string">'\\"'</span>)
cacheBufLen = id.length + <span class="hljs-number">16</span>
cacheBuf = Buffer.concat([openID, <span class="hljs-keyword">new</span> Buffer(id), closeIDOpenSeq], cacheBufLen)
}
<span class="hljs-keyword">else</span> {
cacheBufLen += buf.length
cacheBuf = Buffer.concat([cacheBuf, buf], cacheBufLen)
cacheBuf = <span class="hljs-keyword">new</span> BufferList()
cacheBuf.append(openID)
cacheBuf.append(id)
cacheBuf.append(closeIDOpenSeq)
} <span class="hljs-keyword">else</span> {
<span class="hljs-keyword">if</span> (buf.length === <span class="hljs-number">0</span>) {</pre></div></div>

</li>


<li id="section-3">
<div class="annotation">

<div class="pilwrap ">
<a class="pilcrow" href="#section-3">&#182;</a>
</div>
<p>Ignore empty</p>

</div>

<div class="content"><div class='highlight'><pre> } <span class="hljs-keyword">else</span> <span class="hljs-keyword">if</span> (!cacheBuf) {
<span class="hljs-keyword">this</span>.emit(<span class="hljs-string">'error'</span>, {<span class="hljs-attr">msg</span>: <span class="hljs-string">'Failed fasta parsing'</span>, <span class="hljs-attr">buf</span>: buf})
} <span class="hljs-keyword">else</span> {
cacheBuf.append(buf)
<span class="hljs-keyword">if</span> (bases) { <span class="hljs-comment">// If bases is thruthy then it contains a string with reference bases</span>
cacheBuf.append(closeSeqOpenRevComp)
cacheBuf.append(getReverseCompliment(buf))
}
}
}
next()
}

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">flush</span><span class="hljs-params">()</span> </span>{
cacheBuf = Buffer.concat([cacheBuf, closeSeq], cacheBufLen+<span class="hljs-number">3</span>)
<span class="hljs-keyword">this</span>.push(cacheBuf)
<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">flush</span> (<span class="hljs-params"></span>) </span>{
<span class="hljs-keyword">if</span> (cacheBuf) {
cacheBuf.append(closeSeq)
<span class="hljs-keyword">this</span>.push(cacheBuf.slice())
}
<span class="hljs-keyword">this</span>.push(<span class="hljs-literal">null</span>)
}

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">getReverseCompliment</span> (<span class="hljs-params">buf</span>) </span>{
<span class="hljs-keyword">var</span> seq = buf.toString()
<span class="hljs-keyword">var</span> complement = Buffer.alloc(seq.length)
<span class="hljs-keyword">for</span> (<span class="hljs-keyword">var</span> i = <span class="hljs-number">0</span>; i &lt; seq.length; i++) {
<span class="hljs-keyword">var</span> baseIndex = bases.indexOf(seq[i])
<span class="hljs-keyword">if</span> (baseIndex &gt; <span class="hljs-number">-1</span>) {</pre></div></div>

</li>


<li id="section-4">
<div class="annotation">

<div class="pilwrap ">
<a class="pilcrow" href="#section-4">&#182;</a>
</div>
<p>If the base was found in the base reference, pick the base 4 positions to the right, which is the complementary base</p>

</div>

<div class="content"><div class='highlight'><pre> complement.write(bases[bases.indexOf(seq[i]) + <span class="hljs-number">4</span>], i)
} <span class="hljs-keyword">else</span> {</pre></div></div>

</li>


<li id="section-5">
<div class="annotation">

<div class="pilwrap ">
<a class="pilcrow" href="#section-5">&#182;</a>
</div>
<p>If no match was found (e.g. if specifying that a DNA sequence is RNA), set the complimentary base to 0</p>

</div>

<div class="content"><div class='highlight'><pre> complement.write(<span class="hljs-string">'0'</span>, i)
}
}
<span class="hljs-keyword">return</span> complement
}
}

<span class="hljs-function"><span class="hljs-keyword">function</span> <span class="hljs-title">getBaseComplements</span> (<span class="hljs-params">isRna</span>) </span>{
<span class="hljs-keyword">return</span> isRna ? <span class="hljs-string">'AGCUUCGA'</span> : <span class="hljs-string">'ATGCTACG'</span>
}</pre></div></div>

</li>
Expand Down
Binary file added docs/public/fonts/roboto-black.eot
Binary file not shown.
Binary file added docs/public/fonts/roboto-black.ttf
Binary file not shown.
Binary file added docs/public/fonts/roboto-black.woff
Binary file not shown.
8 changes: 5 additions & 3 deletions fasta-parser.min.js

Large diffs are not rendered by default.

44 changes: 41 additions & 3 deletions lib/fasta-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,16 @@
// // { id: 'sequence2',
// // seq: 'CAGTCCTACTGCATGCATGCATGCATGCATCGATGCATGTCGACTGCATGCATGC' }
//
// To include the reverse complement of the sequences, pass arguments to the parser:
//
// var parser = fasta(true) // Includes the reverse complement for DNA sequences
// var parser = fasta(true, true) // Includes the reverse complement for RNA sequences
//
// Example output:
// // { id: 'sequence1',
// // seq: 'ATGCACGTCACGTCAGTACTCGTCAGTAC',
// // rc: 'TACGTGCAGTGCAGTCATGAGCAGTCATG' }
//
// For a more useful API, check the dependent module:
//
// [bionode-fasta](http://github.com/bionode/bionode-fasta)
Expand All @@ -39,14 +49,18 @@ var split = require('split')
var pumpify = require('pumpify')
var BufferList = require('bl')

module.exports = function () {
return pumpify(split(), parser())
module.exports = function (includeRevComp, isRna) {
includeRevComp = typeof (includeRevComp) === 'boolean' ? includeRevComp : false
isRna = typeof (isRna) === 'boolean' ? isRna : false
var bases = includeRevComp ? getBaseComplements(isRna) : false
return pumpify(split(), parser(bases))
}

function parser () {
function parser (bases) {
var cacheBuf
var openID = Buffer.from('{"id":"')
var closeIDOpenSeq = Buffer.from('","seq":"')
var closeSeqOpenRevComp = Buffer.from('","rc":"')
var closeSeq = Buffer.from('"}\n')
var stream = through(transform, flush)

Expand All @@ -70,6 +84,10 @@ function parser () {
this.emit('error', {msg: 'Failed fasta parsing', buf: buf})
} else {
cacheBuf.append(buf)
if (bases) { // If bases is thruthy then it contains a string with reference bases
cacheBuf.append(closeSeqOpenRevComp)
cacheBuf.append(getReverseComplement(buf))
}
}
}
next()
Expand All @@ -82,4 +100,24 @@ function parser () {
}
this.push(null)
}

function getReverseComplement (buf) {
var seq = buf.toString()
var complement = Buffer.alloc(seq.length)
for (var i = 0; i < seq.length; i++) {
var baseIndex = bases.indexOf(seq[i])
if (baseIndex > -1) {
// If the base was found in the base reference, pick the base 4 positions to the right, which is the complementary base
complement.write(bases[bases.indexOf(seq[i]) + 4], i)
} else {
// If no match was found (e.g. if specifying that a DNA sequence is RNA), set the complementary base to 0
complement.write('0', i)
}
}
return complement
}
}

function getBaseComplements (isRna) {
return isRna ? 'AGCUUCGA' : 'ATGCTACG'
}
45 changes: 45 additions & 0 deletions test/fasta-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ TCAAAGATTCGGAATGCTGTCTGGAGGGTGAATCTAACGGTGCGTATCTCGATTGCTCAGTCGCTTTTCGTACTGCGCGA
AGTTCGTACCGCTCATTCACTTGGTTCCGAAGCCTGTCCTGATATATGAATCCAAACTAGAGCGGGGCTCTTGACATTTGG\n\
AGTTGTAAATATCTAATACTCCAATCGG\n`)

var fastaDataShortDNA = Buffer.from(`>contig1\n\
TCACCAACTACGA\n`)

var fastaDataShortRNA = Buffer.from(`>contig1\n\
AGCUUCAAC\n`)

test('Should parse a FASTA Buffer to a JSON Buffer', function (t) {
t.plan(1)
var result = []
Expand All @@ -29,3 +35,42 @@ test('Should parse a FASTA Buffer to a JSON Buffer', function (t) {
parser.write(fastaData)
parser.end()
})

test('Should include reverse complement if specified', function (t) {
t.plan(1)
var result = []
var parser = fasta(true)
parser
.on('data', function (data) { result.push(JSON.parse(data.toString())) })
.on('end', function () {
t.deepEqual(result[0].rc, 'AGTGGTTGATGCT')
})
parser.write(fastaDataShortDNA)
parser.end()
})

test('Should handle reverse complement of RNA sequences', function (t) {
t.plan(1)
var result = []
var parser = fasta(true, true)
parser
.on('data', function (data) { result.push(JSON.parse(data.toString())) })
.on('end', function () {
t.deepEqual(result[0].rc, 'UCGAAGUUG')
})
parser.write(fastaDataShortRNA)
parser.end()
})

test('Should replace invalid bases with 0', function (t) {
t.plan(1)
var result = []
var parser = fasta(true, true)
parser
.on('data', function (data) { result.push(JSON.parse(data.toString())) })
.on('end', function () {
t.deepEqual(result[0].rc, '0GUGGUUG0UGCU')
})
parser.write(fastaDataShortDNA)
parser.end()
})