Skip to content

Commit

Permalink
add Dataset
Browse files Browse the repository at this point in the history
Signed-off-by: Zhiyuan Chen <[email protected]>
  • Loading branch information
ZhiyuanChen committed May 2, 2024
1 parent 183a11b commit a9f4845
Show file tree
Hide file tree
Showing 8 changed files with 325 additions and 2 deletions.
8 changes: 6 additions & 2 deletions multimolecule/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from . import models, tokenisers
from . import data, models, tokenisers
from .data import Dataset, PandasDataset
from .downstream.crispr_off_target import (
RnaBertForCrisprOffTarget,
RnaFmForCrisprOffTarget,
Expand Down Expand Up @@ -54,9 +55,12 @@
from .tokenisers import RnaTokenizer

__all__ = [
"models",
"data",
"Dataset",
"PandasDataset",
"tokenisers",
"RnaTokenizer",
"models",
"RnaBertConfig",
"RnaBertModel",
"RnaBertForMaskedLM",
Expand Down
3 changes: 3 additions & 0 deletions multimolecule/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .dataset import Dataset, PandasDataset

__all__ = ["Dataset", "PandasDataset"]
93 changes: 93 additions & 0 deletions multimolecule/data/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

from collections.abc import Sequence

import danling as dl
import datasets
import torch
from pandas import DataFrame
from tokenizers import Tokenizer
from transformers import AutoTokenizer, PreTrainedTokenizerBase


class Dataset(datasets.Dataset):

tokenizer: PreTrainedTokenizerBase | Tokenizer
sequence_cols: Sequence
rename_sequence: bool
preprocess: bool

def post(
self,
tokenizer: Tokenizer | PreTrainedTokenizerBase | None = None,
pretrained: str | None = None,
preprocess: bool = True,
rename_sequence: bool | None = None,
):
self.preprocess = preprocess
self.sequence_cols = [k for k, v in self.features.items() if v.dtype == "string"]
if rename_sequence is None:
rename_sequence = len(self.sequence_cols) == 1
self.rename_sequence = rename_sequence

if tokenizer is None:
tokenizer = AutoTokenizer.from_pretrained(pretrained)
if tokenizer is None: # Actually means both tokenizer and pretrained is None
raise ValueError("Either tokenizer or pretrained must be specified")
self.tokenizer = tokenizer

if self.preprocess:
self.update(self.map(self.tokenization))
self.set_transform(self.torch_transform)
else:
self.set_transform(self.tokenize_transform)

if self.rename_sequence:
self.update(self.rename_column(self.sequence_cols[0], "input_ids"))
self.sequence_cols = ("input_ids",)

def update(self, dataset: datasets.Dataset):
# pylint: disable=W0212
# Why datasets won't support in-place changes?
# It's just impossible to extend.
self._format_columns = dataset._format_columns
self._data = dataset._data
self._info = dataset._info
self._fingerprint = dataset._fingerprint

def tokenization(self, data):
return {col: self.tokenizer(data[col], return_attention_mask=False)["input_ids"] for col in self.sequence_cols}

def torch_transform(self, batch):
return {k: dl.PNTensor(v) if k in self.sequence_cols else torch.tensor(v) for k, v in batch.items()}

def tokenize_transform(self, batch):
return {
k: (
dl.PNTensor(self.tokenizer(v, return_attention_mask=False)["input_ids"])
if k in self.sequence_cols
else torch.tensor(v)
)
for k, v in batch.items()
}


class PandasDataset(Dataset):

def __init__(
self,
dataframe: DataFrame | str,
split: str,
tokenizer: Tokenizer | PreTrainedTokenizerBase | None = None,
pretrained: str | None = None,
preprocess: bool = True,
rename_sequence: bool | None = None,
):
if isinstance(dataframe, str):
dataframe = dl.load(dataframe)
if isinstance(dataframe, dict):
dataframe = DataFrame.from_dict(dataframe)
dataframe = dataframe.loc[:, ~dataframe.columns.str.contains("^Unnamed")]
table = datasets.table.InMemoryTable.from_pandas(dataframe, preserve_index=False)
super().__init__(table, split=split)
self.post(tokenizer, pretrained, preprocess, rename_sequence)
68 changes: 68 additions & 0 deletions tests/data/datasets/rna/5utr.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
,seq,label
0,ACCAACATGTAATTTCCACTCTTGA,-1.7812239923226838
1,TGGTAAAATCTAGGGTTTTTTATAA,-0.275642799200671
2,CAAAAAGTAGACGCAACATGAAAAA,-1.1482258696972798
3,TGGCCTCGTGGATAGGACATTTGGA,-1.163470221256106
4,TAATCGGTTCTAAATACGATTAGTG,0.629766441913879
5,TAAAGAAGAGGTTGATGAGAAACCG,-0.0566107803048019
6,AAGCGGTGAATAACACACAGTAAAG,0.4379278257655004
7,ATCTCTCTAGTACAGATTGTCAATA,0.6259435716867096
8,AGACAGCTAAAACCCTACAAAATAA,0.4289960198091346
9,CCTTCGACGACCCACGTCCGCCTTA,0.0669630515711629
10,CGTTGATCATGGATACTTTTTTACA,-1.4310155058894878
11,GTACGCAAACCATCTCTCGATTTCT,0.3852448242284625
12,GTTACCCCCTACTCCAGCTCATACT,0.1064178675873167
13,TCCAATCTTTTGCACCACCCCTAGG,0.1675831224155592
14,CTCCCTCAACAGGTGCCTCACGCTG,0.4482936797119086
15,AGTAATGAGTTTCGGCATTTCAAAG,0.4487089779004714
16,AGGATTGTGTCGCCAGTTCCACTGA,0.2266517419179321
17,TAATATCATATAGTTCTTCTCCCCT,0.0910128870657181
18,TAGAATCGGAAGGAATAGGATTCTA,0.6830431344740635
19,GATGCTTGCACTCGAGGTCCGTGCA,0.7779586432740309
20,GACACCACGTAAAATCCTAATCAAA,0.7227716269767893
21,TCTATGACTCGTTCGCGTAGAATCA,-0.9032312196091278
22,CAAAATGATAAGATGGACCAAAGAT,0.0447788302836263
23,TGCATGATCTGTAGCATTTGCTGCT,-0.2827116234508076
24,GCATGACCAGCCTGTTTAGATAGAA,-0.8848997916225679
25,AGAAAGATAACAAACCACCCGTATG,0.5311009416510439
26,GACCCCTTTACGCAACCTATTGAAC,0.7381938795002485
27,GCCCCTACACTCTGTTTTTTGATCC,0.4626627635505125
28,GGATAAATAAATCTGAGATCAGAAA,0.5764032765766933
29,CCCTGTTGCCAGCCGCATAATCATC,0.5072462083719866
30,GCAGCACGCTTACAGTCCCTCAGAC,0.5885318168197583
31,CTTTTTCCTTACTCGTGATACTATC,0.3549980256557335
32,GTAAACCCAGATCTAGTTTGACTGT,0.4340458948389251
33,CACGCTGCACACCGAACAGCCCAAA,-0.0060640299062117
34,ACTCCGACACCATCTTCATTACAAT,0.40927053992064
35,TACATGGAACTGTCCCTTCTTACCG,-0.8422048835483932
36,GACCCTCCTATTATCAACCAAGATA,0.2085787716855296
37,AGAGTGAGAGCGCGACAAATCACTG,0.677525749419415
38,CCGATTGGCGCCCTTTGGCCGGGAG,0.0662045936850974
39,GAGATGAGAAGTCGTGCGAAATAAC,-1.5323635165013456
40,GTCCTCGCGACAACTGTCCCAAACC,0.2904891214718897
41,TTCTGATCGGTGTTCCTCCGTTCTG,0.4886688832278358
42,TCTAGTCGTTTCTAGCATAGACTATA,0.6682424782790564
43,GCAATGCATCCATTCCAATGCCTACT,-1.1913139865591946
44,ACCTTGCCGCATCCCACTTGCCTGCA,0.458991405155542
45,CAAACTGGGCCCATTTCTATACCAAT,0.1781652824101883
46,ACGAGAGTAACAGATCCAACCTAAA,0.6019488593566376
47,CCTACGCGGGATGCTCTTTTTTATAG,-1.1687525556467426
48,GATCCAGGAGACAGAAACCATCTACC,0.4738692979644047
49,TCGCAAAGAAGAACCTATTTTAAGA,0.7018982636372705
50,TATAATTACGCTTTTCCGTGTATGG,-0.3720488657064282
51,TCAATTACAGCTCGACTTCCATGATC,0.2754572607942968
52,AAGCCGTTCTTTAAATCCACACATTT,0.2832481855967742
53,AGTCCATCCTCGCGGCCTCACACCA,-0.1678433893986053
54,AGTCCCGTCCTACACGCTCGGTCCG,0.3135193265556327
55,CCCCATATCCGATTATCTGCTGGAC,0.5673113165112577
56,CGTAGTGGCGCAGGACCGTCAATTA,0.3736517875688682
57,CTCTGCTATGCCCCACCACTCAACA,0.5126163959293235
58,ATCCACCAATCCCTACATTCATCTTC,0.5112259267226038
59,GAGAGTGTCGCCGAAGCACAAGCCGA,0.4693891586433297
60,CCTGTCGATCTAGGTCCTATTGTCCG,0.6496399244427643
61,ATTTCTAACTTCTTCTGGCAACGACA,0.5061690522661538
62,ATATACGGCAACACGCCCGAACCAGA,0.2119265981391584
63,CCTCGTTAATCCTTCCCTTGTCTCCC,0.1640642263497583
64,TCCCCGCCACGCCCGGTATCCGACTA,-0.0315210929562356
65,GAAACTCGTGTTTATTCTCGTCGAT,0.7040646602119047
66,AAGAAAACATACAAGTCTGTTCACT,0.6293633976161706
1 change: 1 addition & 0 deletions tests/data/datasets/rna/modification.json
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"sequence":{"0":"TTGCCACACTGCTGGACGCCTGCAAGGCCAAGGGTACGGAGGTCATCATCATCACCACCGATACCTCGCCCTCAGGCACCAAGAAGACCCGGCAGTATCTC","1":"TTTGAAAAAATATTAGCAATGTGAGGACACTTAAGCAGTTTTGTCAATTCAGCTGAATCCAGCCTCATAGCAAAATCTGGTCTTAAATTCCCTCATCGTGC","2":"AGAAACATTCAACCTCCCTTCTTTTTATTCCAGTTGTCCTTTTCTCTGACACTTGCATCAATTTTCTGATTGCCTAGGCTCTTAATATTGCTTTCTGTTCA","3":"TTAGTTTTACTATGGAATCATAATAACCCACATAGAAGACTGATATTAAGAGCACAGAAGAAATAGTCCCAATGTTTATGTCATTTAATTTGAAAAATTTC","4":"CAACAGAAGTTTCTCATCTATAATCAGTAGCACTAAACTCTTGGTTTGAAAAATATTTAGTATGGGTAATACTTGGAGTATCAGTTTTCATTAAAATGTAC","5":"AATGTGTTTGTGTGTGTCTCTCACACACACACATAACATGTACATACCTGAAACTCATACTGCAATTGCAACACATCTTAAGTTTTTCCTTTTAAACATAC","6":"AAGTAGAAGACAACAGTACTCTTTTTTTTTTGAAATGGAGTCTCACTCTCACCCAGGCTGGAGTGCAATGGTGTGATCTCGGCCCACTGCACTCCAGCCTG","7":"GCCACCACACGTGGCCACAGTTTGGGCTTTTGAAAAAAGTTAGGTGGAGGAAGAGAGGTATGAGTACTCTAGTTTTCACTGCAGTATCCCATTTGTGTGTG","8":"AGGCTGTTTTAGCTTAAGTAAAATTTAAAAATTAGTTCCTTAGTCACATTAGCCACATTTAATGTGTTCTATAGCCACGTGTGACTGGTGGCTAACATATT","9":"GCAAGTGGTGTTTGGTTACATGAATAAGTTCTTTAGTGGGGATTTCTGAAATTTTGGTGCACCCATCACCTGAGCAGTGTACACTGTATCCAATGTGTAGT","10":"TTCTCAGGATATGTTATAGGATTCTTCTGACCACTAGAGTAGAGTGAACGATATGTTTTAATGTTCAGAAGTCACTATGGAGTAAACCAAATATATATAGG","11":"TTTTCCAGGATTTCATGAAACAAAGAGTTAAGAACTACAGTAGTGGAGCAATATTCATGGTGCTTTTTCTTTTTCTTTTGAAATAATTAAAAACTTACAGA","12":"GTATTGTCGTCTCACTCTATTATCAGCCTACCTCCGGTGGCCCTTGGGGCATGTGGCTGGGCCCAGGGTGATTCATCTAGAGCCAGCTCAGGTGGCAGTGA","13":"GGTTTTTTTTTTTTTTTTTTAGTCCATCCATTCTTTGATTTAATTTGGCAAACCCACATTAGATAATTTAGCAGAAGAGGAATTATATCTTCATCCTATTA","14":"AAGAAACCTGAACCAAGGCCTTGGGTATCAGATTGGCTGGATAAGGAGGGATGAGCACAGAAGGAAGGACAAAGATAATACCTTTTTCAAGATGAGCCTGT","15":"CTCATTTTGTAAGGAGACACTTAGATGCATTTCTGAAAAAAACAAAACAAAACAAAACAAAACAAAAAACACTTTGGGCTTTCTCTGTATTCTTCAAGCAT","16":"GTAAGTGAGATTACTTTATTTATTTCTTTTTCAGATTGTTCACTGTTGGCATATAGAAATGCTACTAATTGTTGTATGTTGATTTTGTATCCTGAAACTTT","17":"CATGCCTGTAATCCCACCTACTCAGGAGGCTGACGCAGGAGAATTGCTTGAATCCGGGAGGTGGAGGTTGCAGTGAGCCAAGATCACGCCACTGCACTCCA","18":"GGGTCCAGCCCAGGCTGTTTGGTCCCAGAGCCTGTGCTCTTGTCCATTATACTGGTGGTATTGCCCCTGGCATTGACAAAGTGGGAAAAGATGACTAACCT","19":"TGGCTCACACCTGTAATCCCTGCACTTTGGGAGGCCAAGGTGAGCAGATCACTTGAGGTCAGGAGTCTTGAGACCAGCCTGGCCAACATGGTGAAGCCCTA","20":"ATATGAATGATTTGTCATTTATGTCTAATCACTAAGTAAAAATATCAATTATGATTACTTTTTAAGTTTTATTGATGCATAATTATACATATTTATGGGGT","21":"ACATCAAAAAGTTTGAAAGAGCACAAATAGACAACCAAGGGTCACACGTCATGGAACTGGAGAAACAAGAACAATAGAAACCCAAACCTAGCAGAAGAAAA","22":"CTATCAGAAATAATGAAAAAACTCACCTTTGGGATTTTCATTAGTTTGGCAATCACTTCTCCTTTTGAAAGATTGGTGGACTGTACATTTTATTATTATTA","23":"TGATCTTATTTGTTTCTGTGTCTTGAAATAGTTTGCTGTTTTGTCATCTTAGAAATTGATTCATTATTAACTCATTTATTCTCAACTATGCTAAAAAAAAG","24":"AAACCACAAAGATGGGGAGAAACCAGAGCAGAAAAGCTGAAAAGTTCAAAAAACCAGAGCACCTCTTCTCCTCCAAAGGATTGCAGTTCCTCACTGCAAAG","25":"CTACACAAGGTATTTCACAATATCCTTAGGAATTACTGAGTTTTAGAGTGACAGAATAATTACCAATTATTCTGATAGTAAATTTGTAGGTACATTATAAT","26":"GCTACCTCTACTTTTAACATATTTTAGGCATTAGGACTTGCTTAGCCTTTAATACACAGGAATATTAACTAAAATGCACATATAAAACAATTGGTTAGACA","27":"CCTGGATCTAAAAGTGTTTTTATTTTTTGTGCCCACATCTGTAGTCATGGATTTGATGTATATATTTAATAACATTCAGTGATTTATTTTTCGGTTCACCT","28":"TCTGAAGTCATAGTCCCTTGGTTTTCCCTGACCTGCCTGCTACTGCGCCCACTTGCAGCAGCACCTCCGTTGCCCAGTGAAGCATGCTGCCCTGGTCTTAC","29":"CTGGGGCGGGCGGGTCAGTTGAGGCCAGGAGTTCGAGACCAGCCTGGCCAACGTGGCAAAACCCTGTCTCTACTAAAAATACAAAAAAGTTAGCTAGGCGT","30":"AATATTGCATGGGCCATACTTATATTTTTAAAATATTCATTGTTTATCAGAATTCAAATTTAACTGGGCATCCTGTATTTTTATTAGCTAAATCTGGCAAC","31":"GACTAGCTGCAGAAAGTGACATTTACACTGGGACAGGAGTCAAAGAGTATATTGATGCAAAGGAAAGACCATGAATTAGACCTGAGTTCAAATCCTAGCCG","32":"AGAAAAAGACAGAGGTTTATAGAAGTTTTTTCCACAAAATTTATTTGTGCATTAATCGATAGGCAACATAGTGTAAAACATAGCTAGCTGAATATTCAGAA","33":"TGCCACTATTGGGGTAACCCACCCCCAATATTACAACATAGGTTCTTTCTATTTTCCATAAGTGTTGGCTGGCTGAGAAATAAAGAGAAAGAGTACAAAGA","34":"TGGAAGGAAGAATTGCTTTTCTGAGGTCAATGCTCAGCTTGGCTGTTGGCAAGTCAACCTTTAGGAATCTGTGTATTCAGGGTATAGCAGTGGAAGTATAG","35":"AAAATCAGCAGCTAGTATTTGCAAATGGTGTTTGTATTTACTCTTGAAATACATGGTTTTGTGCTGGAGATTTGGAGTAAGGAAACTTAGGCACTATAGTC","36":"TCCACTTGCTGCATTATTTTTTTCTTTCTTTTTTTTTGCTGATTATTTTTATATGAATGTTAAATGATAAAGTCTTCTACATCATATCCCATTTAAGCTGC","37":"TGTTTTTACATTGAAAGTAGACAAATAGTTTTGTCATCTGTTTCTCATCCATTTCTAATATTTAAATATAATAAAGTCTAATTGAATACAAAAACAAACAA","38":"AAAGGATGACGAAGTGTAGAGAAGAGGCCAGCCATAGGAAAAGGGGAGTCACTTATGGGAAGGTGACTAGGAAATGTGTGATATACAGGGGTTGTTAGTAA","39":"GGGCCGTCCTGAACACTGCCACCTCTGAGCGTTGGCATCCATCTGCTAGGATTAGCATTGGAGCTTTTTTTGAAGGTATTTTGAAGTCTAATGGGAGAGGA","40":"TCCCCAGGCTGGAGTGCAATGGCACAATCACAGCATACCTCCCAGGCTCAAGCAATCCTCCCACCTCAGCCTTTTGAGTAGCTGGGACCAGAAGCACGTGC","41":"ATTATGGCCCAGCCTATACCCAGAAGAGAGGACTTAACTTGTGCTCCATGAACCACTGTGTCTGGGACACTGAGTAACCTAAGAATTTTCTTTGATATGAC","42":"TCAGCCTCCCGAGTAGGTAGGATTACAGGCATGCGCCACCATGACCGATTAATTTTGTATTTTTGGTAGAGACGGGGTTTCACCATGTTGGTCAGGCTGGT","43":"AAATTCATTTTTTCAATCATTTAAGGAACTTAGATATAAAATACACCTTTAATTCACCTTTGGAAATTTTTTACAAAGTGTTTTATTTGCAAATGACAGTG","44":"ATTAGTTATTTCAGTGTTTATTTCATTTGATGAAGAAACGTTTGCATATGAATGTTGGGAATTCTAGCAGGTCCTGCCTCAATGTGAAGAGGCATTTTTTT","45":"CAGGTGCCTGCCACCATGCCTGGCTTATTTTTGTATTTTTAGTAGAGACAAGGTTTCACCAGGTTGGCCACTCCTGGTCTTGAACTCCTGACCTCAGGTGA","46":"TTTTTTTTTTTTTTTTTTTTACTGTGTCCCAGGCTTAAGAAAAAAGTGATACATGATGTGGGATTAAAATCAAGAACATCATTGAACTTCACCTTCCCTCC","47":"CGGGAGGCACGGGCCCTTCGGGGATGACGTCACGGGCGGGGGCCCCGGACACGCGAGCCTTGCGCCCCACAGACGGCGGCGCAGCCCGCCGCCCTTTTCGA","48":"TGAGGCTTAAGTGATCCTCCCACCTTAGCCTCCTAAGTAGCTGGGAGTACAAATGCACACCACCACACCTGGCTAATTTTTGTATTTTTTGTTTTGCCATG","49":"ACTCATAGCTCTATGTCTCTTATAGTTCTTAGCACAATATCTTGGCCTAGATGAAGTACATAATAATTATATGTAGGGTTGTGGAAAGCAGTGCTGGCTTT","50":"GGCTCCTTCGGAGGCAGAATATGTCAACTCGTTGGCTTCTCACAAAATCAAGTGAGTCAGAAACCTGAATGGGGTTTCGGCTGGTCTCACCTAATTAACTT","51":"TATCTACCACCTGGATTCTACAACTGACATTTTATTATACCTAGTTTTTTACATGTCTGTCCATCTGTCTCATCCATAGATCCATTTTATTTCTTTATACA","52":"CATGTATGTATACTTAACTAAGTTAATAAAAACTGTCCTATTTCTCCTGGACATTAGAGAGATCTCAGAACTCTTTAACTCCGTGTACCCACCTCCTGACT","53":"GGAGCTGGTTCAGGAGATCACACAACATTTATTCTTCTTACAGGTACATCAGTCAAGGCTACCCCCCAGTTCTGAGAGAACTTGCCCAGGAGTGGTTGCAG","54":"TTCCTGGTTGGTTGAATCACTGGATGCGGTACCCACGGATGCAGAGAGTGACTGTACAGAAAAAAAGCATCTATTGCCTTTCCAGGCCAAGCTTTCTGTCT","55":"ACATTTTAGAAAATAAAATGCACCGAACAAACATGGGGTGTTCCTACCGCAGCATGGGAAAGGCGAGGCGCCATCCCACCAAGGCGGGTGTGGTTTTGAGC","56":"GAACGAAAAGAGGAAGTAGTGAGTGAAAAGGAAAGAAGAAAACATTAAGAAGTAGAGGAAAAAGAATTAAGTCGATTAGATGCAATGAGGGAAGAGGAAAA","57":"GAGAAACAGTGACAAATTCTGAGGGGAGCCTACAGTGTATAGTGTTGTGTATAGTGTGTATAGTATATAGTGGTTGTGTATAGTGGCCTCTGCCTTTTACC","58":"CCTTGCCAATCCCCATGAAAATGTTCAGTTATGTCAAAAGCAAGGCAAAAACAGTCTCTTGGCTATACAAGGGTAGCTGTTTTATTTGACTAAAATTTAGC","59":"ATTGTAGTGCAAAGCAGCCACAGACAAAATTTAAATGAATGAACCTGGCCATATTCCAATAAAATGAATTTGAATTTCAAATAATTTTTATGTGTCATAAA","60":"TGAGAAGAAAGAAAGAAAGAAAAAGAGGGGGGGGAGGGAGAGAGAGAGAGAGAAAGGAAGGAAGGAGAAAGAAGAAAGGGAGAGGGAGAGAGAGAGAGAGG","61":"AGTACTTTCAACACTGCATGGCACATAGTAAGGGCACAATAAATGTTAATAATTATGATGGTGGTCATGATGATGATGATCATATGCTTATCTTCCATCCC","62":"GACTCTGTCACCCCCCGCCCCCTGGAAAAAATGCGTTTTTTGACTTAATGATATTTTCAATTGTGATGGGTTAATTGAGATATCACCCCACTGTAAGTTTA","63":"CATATCTCATATTTACAGATTCCTTCAGGGTAAGAAAACTTATGTCTTCTAGGGAAACCACTCCTTTTAAATCTATGTGATTTATCCTATAAGCCACTTAA","64":"AATTTAAAAAGTGTTAAGCACCATAGATGTGCATTTTTAGGAATAAGATGAGTTATTCACTGAAGAAGAGCTCTGCAGGAAGGTGAAAGCTCTCCTTTAAA","65":"ATGGGTTTTGGATTTAATGGGGCATTGGGGGAGTGAGAGGGCATCTGCAGAAAAGAGCCATCCAGGCTGCAGAACTCTTGTTTCCAGCAAATAGTCCATTG","66":"AGATACCAGGAATGACCTGATTCAGGCTAGTAAGTGACGTTTGCCTAGAGATCAGTCTAACTGGGGCTCAAGATATGGCCTAGCTGTGAAACAACAGATGA"},"label":{"0":[1,0,0,0,0,0,0,0,0,0,0,0],"1":[1,0,0,0,0,0,0,0,0,0,0,0],"2":[1,0,0,0,0,0,0,0,0,0,0,0],"3":[1,0,0,0,0,0,0,0,0,0,0,0],"4":[1,0,0,0,0,0,0,0,0,0,0,0],"5":[1,0,0,0,0,0,0,0,0,0,0,0],"6":[1,0,0,0,0,0,0,0,0,0,0,0],"7":[1,0,0,0,0,0,0,0,0,0,0,0],"8":[1,0,0,0,0,0,0,0,0,0,0,0],"9":[1,0,0,0,0,0,0,0,0,0,0,0],"10":[1,0,0,0,0,0,0,0,0,0,0,0],"11":[1,0,0,0,0,0,0,0,0,0,0,0],"12":[1,0,0,0,0,0,0,0,0,0,0,0],"13":[1,0,0,0,0,0,0,0,0,0,0,0],"14":[1,0,0,0,0,0,0,0,0,0,0,0],"15":[1,0,0,0,0,0,0,0,0,0,0,0],"16":[1,0,0,0,0,0,0,0,0,0,0,0],"17":[1,0,0,0,0,0,0,0,0,0,0,0],"18":[1,0,0,0,0,0,0,0,0,0,0,0],"19":[1,0,0,0,0,0,0,0,0,0,0,0],"20":[1,0,0,0,0,0,0,0,0,0,0,0],"21":[1,0,0,0,0,0,0,0,0,0,0,0],"22":[1,0,0,0,0,0,0,0,0,0,0,0],"23":[1,0,0,0,0,0,0,0,0,0,0,0],"24":[1,0,0,0,0,0,0,0,0,0,0,0],"25":[1,0,0,0,0,0,0,0,0,0,0,0],"26":[1,0,0,0,0,0,0,0,0,0,0,0],"27":[1,0,0,0,0,0,0,0,0,0,0,0],"28":[1,0,0,0,0,0,0,0,0,0,0,0],"29":[1,0,0,0,0,0,0,0,0,0,0,0],"30":[1,0,0,0,0,0,0,0,0,0,0,0],"31":[1,0,0,0,0,0,0,0,0,0,0,0],"32":[1,0,0,0,0,0,0,0,0,0,0,0],"33":[1,0,0,0,0,0,0,0,0,0,0,0],"34":[1,0,0,0,0,0,0,0,0,0,0,0],"35":[1,0,0,0,0,0,0,0,0,0,0,0],"36":[1,0,0,0,0,0,0,0,0,0,0,0],"37":[1,0,0,0,0,0,0,0,0,0,0,0],"38":[1,0,0,0,0,0,0,0,0,0,0,0],"39":[1,0,0,0,0,0,0,0,0,0,0,0],"40":[1,0,0,0,0,0,0,0,0,0,0,0],"41":[1,0,0,0,0,0,0,0,0,0,0,0],"42":[1,0,0,0,0,0,0,0,0,0,0,0],"43":[1,0,0,0,0,0,0,0,0,0,0,0],"44":[1,0,0,0,0,0,0,0,0,0,0,0],"45":[1,0,0,0,0,0,0,0,0,0,0,0],"46":[1,0,0,0,0,0,0,0,0,0,0,0],"47":[1,0,0,0,0,0,0,0,0,0,0,0],"48":[1,0,0,0,0,0,0,0,0,0,0,0],"49":[1,0,0,0,0,0,0,0,0,0,0,0],"50":[1,0,0,0,0,0,0,0,0,0,0,0],"51":[1,0,0,0,0,0,0,0,0,0,0,0],"52":[1,0,0,0,0,0,0,0,0,0,0,0],"53":[1,0,0,0,0,0,0,0,0,0,0,0],"54":[1,0,0,0,0,0,0,0,0,0,0,0],"55":[1,0,0,0,0,0,0,0,0,0,0,0],"56":[1,0,0,0,0,0,0,0,0,0,0,0],"57":[1,0,0,0,0,0,0,0,0,0,0,0],"58":[1,0,0,0,0,0,0,0,0,0,0,0],"59":[1,0,0,0,0,0,0,0,0,0,0,0],"60":[1,0,0,0,0,0,0,0,0,0,0,0],"61":[1,0,0,0,0,0,0,0,0,0,0,0],"62":[1,0,0,0,0,0,0,0,0,0,0,0],"63":[1,0,0,0,0,0,0,0,0,0,0,0],"64":[1,0,0,0,0,0,0,0,0,0,0,0],"65":[1,0,0,0,0,0,0,0,0,0,0,0],"66":[1,0,0,0,0,0,0,0,0,0,0,0]}}
Loading

0 comments on commit a9f4845

Please sign in to comment.