Skip to content

Commit

Permalink
v1.2.1
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangrengang committed Oct 11, 2019
1 parent 850b869 commit 8555fdb
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 4 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ mafft --auto rice6.9.5.liban.rexdb.cls.pep.INT_TPase.faa > rice6.9.5.liban.rexdb
```
Note: the domain names between rexdb and gydb are different: PROT (rexdb) = AP (gydb), RH (rexdb) = RNaseH (gydb). You should use the actual domain name.

### extracting TE sequences from genome for TEsorter ###
Here are examples to extract TE sequences from outputs of wide-used softwares.
### Extracting TE sequences from genome for TEsorter ###
Here are examples to extract TE sequences from outputs of wide-used softwares, when you have only genome sequences.

1. extract all TE sequences from [RepeatMasker](http://www.repeatmasker.org/RMDownload.html) output:
```
Expand Down
3 changes: 2 additions & 1 deletion bin/LTR_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,8 @@ def get_full_seqs(self, fout=sys.stdout):
d_seqs = seq2dict(self.genome)
for rc in self.intact_list():
ltr_seq = d_seqs[rc.chr].seq[rc.start-1:rc.end]
print >> fout, '>{}\n{}'.format(rc.LTR_loc, ltr_seq)
ltr_cls = '{}/{}'.format(rc.TE_type, rc.SuperFamily)
print >> fout, '>{}#{}\n{}'.format(rc.LTR_loc, ltr_cls, ltr_seq)
def re_scn(self): # remove redundant
idmap = self.seqIdmap
lrt_set = set([])
Expand Down
5 changes: 4 additions & 1 deletion bin/RepeatMasker.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ def __init__(self, line):
convert = [int, float, float, float,
str, int, int, str, str,
str, str, str, int, str, str ]
assert len(temp) == len(title) or (len(temp)-1 == len(title) and temp[-1] == "*")
assert len(temp) == len(title) or (len(temp)-1 == len(title) and temp[-1] == "*") or (len(temp) == len(title)-1)
# try: assert len(temp) == len(title) or (len(temp)-1 == len(title) and temp[-1] == "*")
# except AssertionError: print >> sys.stderr, temp, '\n', title
self.__dict__ = {key: func(value) for key,value,func in zip(title, temp, convert)}
self.query_left = int(self.query_left.strip('()'))
self.repeat_begin = int(self.repeat_begin.strip('()'))
Expand Down Expand Up @@ -70,6 +72,7 @@ def get_seq(self, seqRecord):
id = '{}:{}..{}|{}#{}'.format(self.query_id, self.query_begin, self.query_end, self.repeat_family, self.super_class)
teRecord = seqRecord[self.query_begin-1:self.query_end]
teRecord.id = id
teRecord.description = id
return teRecord

class RMOutParser():
Expand Down

0 comments on commit 8555fdb

Please sign in to comment.