forked from tskit-dev/msprime
-
Notifications
You must be signed in to change notification settings - Fork 2
/
large-example.py
35 lines (28 loc) · 1019 Bytes
/
large-example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
"""
Example in which we reproduce the simulations in the GQT paper,
"Efficient compression and analysis of large genetic variation datasets"
by Layer et al.
"""
from __future__ import print_function
from __future__ import division
import time
import msprime
def main():
before = time.clock()
# Run the actual simulations
tree_sequence = msprime.simulate(
sample_size=10**6,
length=100 * 10**6,
Ne=1e4,
recombination_rate=1e-8,
mutation_rate=1e-8,
random_seed=1 # Arbitrary - make this reproducible.
)
duration = time.clock() - before
print("Simulated 100k genomes in {0:.3f} seconds.".format(duration))
# Write the results to file, which is small and can be quickly reloaded
# to avoid the cost of re-running the simulation. We can reload the
# file in a few seconds using msprime.load(filename).
tree_sequence.dump("tmp__NOBACKUP__/large-example_{}.hdf5".format(msprime.__version__))
if __name__ == "__main__":
main()