-
Notifications
You must be signed in to change notification settings - Fork 70
/
sdict.c
86 lines (79 loc) · 1.67 KB
/
sdict.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#include <string.h>
#include "sdict.h"
#include "khash.h"
KHASH_MAP_INIT_STR(str, uint32_t)
typedef khash_t(str) shash_t;
sdict_t *sd_init(void)
{
sdict_t *d;
d = (sdict_t*)calloc(1, sizeof(sdict_t));
d->h = kh_init(str);
return d;
}
void sd_destroy(sdict_t *d)
{
uint32_t i;
if (d == 0) return;
if (d->h) kh_destroy(str, (shash_t*)d->h);
for (i = 0; i < d->n_seq; ++i)
free(d->seq[i].name);
free(d->seq);
free(d);
}
int32_t sd_put(sdict_t *d, const char *name, uint32_t len)
{
shash_t *h = (shash_t*)d->h;
khint_t k;
int absent;
k = kh_put(str, h, name, &absent);
if (absent) {
sd_seq_t *s;
if (d->n_seq == d->m_seq) {
d->m_seq = d->m_seq? d->m_seq<<1 : 16;
d->seq = (sd_seq_t*)realloc(d->seq, d->m_seq * sizeof(sd_seq_t));
}
s = &d->seq[d->n_seq];
s->len = len, s->aux = 0, s->del = 0;
kh_key(h, k) = s->name = strdup(name);
kh_val(h, k) = d->n_seq++;
} // TODO: test if len is the same;
return kh_val(h, k);
}
int32_t sd_get(const sdict_t *d, const char *name)
{
shash_t *h = (shash_t*)d->h;
khint_t k;
k = kh_get(str, h, name);
return k == kh_end(h)? -1 : kh_val(h, k);
}
void sd_hash(sdict_t *d)
{
uint32_t i;
shash_t *h;
if (d->h) return;
d->h = h = kh_init(str);
for (i = 0; i < d->n_seq; ++i) {
int absent;
khint_t k;
k = kh_put(str, h, d->seq[i].name, &absent);
kh_val(h, k) = i;
}
}
int32_t *sd_squeeze(sdict_t *d)
{
int32_t *map, i, j;
if (d->h) {
kh_destroy(str, (shash_t*)d->h);
d->h = 0;
}
map = (int32_t*)calloc(d->n_seq, 4);
for (i = j = 0; i < d->n_seq; ++i) {
if (d->seq[i].del) {
free(d->seq[i].name);
map[i] = -1;
} else d->seq[j] = d->seq[i], map[i] = j++;
}
d->n_seq = j;
sd_hash(d);
return map;
}