Skip to content
This repository was archived by the owner on Mar 31, 2023. It is now read-only.

Commit 51c4a3a

Browse files
committed
all notebook generates the analysis results for the PGR-TK manuscript
1 parent f23d21c commit 51c4a3a

40 files changed

+45674
-0
lines changed

00-0-setup.ipynb

+71
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "6da0917c",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"!ln -sf /data/pgr-tk-HGRP-y1-evaluation-set-v0/* /data"
11+
]
12+
},
13+
{
14+
"cell_type": "code",
15+
"execution_count": 2,
16+
"id": "651cb4a8",
17+
"metadata": {},
18+
"outputs": [
19+
{
20+
"name": "stdout",
21+
"output_type": "stream",
22+
"text": [
23+
"Processing /code/pgrtk-0.3.4-cp38-cp38-linux_x86_64.whl\n",
24+
"Installing collected packages: pgrtk\n",
25+
"Successfully installed pgrtk-0.3.4\n"
26+
]
27+
}
28+
],
29+
"source": [
30+
"!pip install /code/pgrtk-0.3.4-cp38-cp38-linux_x86_64.whl "
31+
]
32+
},
33+
{
34+
"cell_type": "code",
35+
"execution_count": null,
36+
"id": "ef37b6f8",
37+
"metadata": {},
38+
"outputs": [],
39+
"source": []
40+
},
41+
{
42+
"cell_type": "code",
43+
"execution_count": null,
44+
"id": "2cd70928",
45+
"metadata": {},
46+
"outputs": [],
47+
"source": []
48+
}
49+
],
50+
"metadata": {
51+
"kernelspec": {
52+
"display_name": "Python 3",
53+
"language": "python",
54+
"name": "python3"
55+
},
56+
"language_info": {
57+
"codemirror_mode": {
58+
"name": "ipython",
59+
"version": 3
60+
},
61+
"file_extension": ".py",
62+
"mimetype": "text/x-python",
63+
"name": "python",
64+
"nbconvert_exporter": "python",
65+
"pygments_lexer": "ipython3",
66+
"version": "3.8.5"
67+
}
68+
},
69+
"nbformat": 4,
70+
"nbformat_minor": 5
71+
}

00-1-create_pgr_index.ipynb

+172
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "7e41163d",
7+
"metadata": {},
8+
"outputs": [
9+
{
10+
"name": "stdout",
11+
"output_type": "stream",
12+
"text": [
13+
"pgr-mdb pgr-bin 0.3.2 (main:2045adc, release build, linux [x86_64] [rustc 1.62.0 (a8314ef7d 2022-06-27)])\n",
14+
"create pgr minimizer db\n",
15+
"\n",
16+
"USAGE:\n",
17+
" pgr-mdb [OPTIONS] <FILEPATH> <PREFIX>\n",
18+
"\n",
19+
"ARGS:\n",
20+
" <FILEPATH> \n",
21+
" <PREFIX> \n",
22+
"\n",
23+
"OPTIONS:\n",
24+
" -h, --help Print help information\n",
25+
" -k, --k <K> minimizer k-mer size [default: 56]\n",
26+
" -m, --min-span <MIN_SPAN> min span for neighboring minimiers [default: 64]\n",
27+
" -r, --r <R> sparse minimizer (shimmer) reduction factor [default: 4]\n",
28+
" -s, --sketch using sketch k-mer than minimizer\n",
29+
" -V, --version Print version information\n",
30+
" -w, --w <W> minimizer window size [default: 80]\n"
31+
]
32+
}
33+
],
34+
"source": [
35+
"%%bash\n",
36+
"/code/pgr-mdb --help"
37+
]
38+
},
39+
{
40+
"cell_type": "code",
41+
"execution_count": 2,
42+
"id": "6c938aa8",
43+
"metadata": {},
44+
"outputs": [
45+
{
46+
"name": "stdout",
47+
"output_type": "stream",
48+
"text": [
49+
"pgr-mdb pgr-bin 0.3.2 (main:2045adc, release build, linux [x86_64] [rustc 1.62.0 (a8314ef7d 2022-06-27)])\n"
50+
]
51+
}
52+
],
53+
"source": [
54+
"%%bash\n",
55+
"/code/pgr-mdb --version"
56+
]
57+
},
58+
{
59+
"cell_type": "markdown",
60+
"id": "14167273",
61+
"metadata": {},
62+
"source": [
63+
"Creating and Timing for creating the PGR- himmer index. Note the input file should contains a list of files in agc or fasta / fastq format.\n",
64+
"\n",
65+
"Example:\n",
66+
"```\n",
67+
"$ echo HPRC-y1-rebuild-04252022.agc > filelist\n",
68+
" \n",
69+
"# using pgr-mdb to create the index files, for 97 haplotyed genome assembly from HPRC year one release,\n",
70+
"# it takes about 30 to 40 min to create the index files\n",
71+
"\n",
72+
"$ pgr-mdb filelist HPRC-y1-rebuild-04252022\n",
73+
"\n",
74+
"# two index files will be created by the pgr-mdb command\n",
75+
"# one with a suffix .mdb and another one with a suffix .midx\n",
76+
"# when we use the load_from_agc_index() method, all three files, e.g., genomes.agc, genomes.mdb and\n",
77+
"# genomes.midx should have the same prefix as the parameter used to call load_from_agc_index() method\n",
78+
"```"
79+
]
80+
},
81+
{
82+
"cell_type": "code",
83+
"execution_count": 3,
84+
"id": "7af519d4",
85+
"metadata": {},
86+
"outputs": [
87+
{
88+
"name": "stderr",
89+
"output_type": "stream",
90+
"text": [
91+
"\tCommand being timed: \"/code/pgr-mdb /scratch/pgr-tk-HGRP-y1-evaluation-set-v0_input /scratch/pgr-tk-HGRP-y1-evaluation-set-v0\"\n",
92+
"\tUser time (seconds): 10812.93\n",
93+
"\tSystem time (seconds): 348.74\n",
94+
"\tPercent of CPU this job got: 1089%\n",
95+
"\tElapsed (wall clock) time (h:mm:ss or m:ss): 17:04.90\n",
96+
"\tAverage shared text size (kbytes): 0\n",
97+
"\tAverage unshared data size (kbytes): 0\n",
98+
"\tAverage stack size (kbytes): 0\n",
99+
"\tAverage total size (kbytes): 0\n",
100+
"\tMaximum resident set size (kbytes): 61238280\n",
101+
"\tAverage resident set size (kbytes): 0\n",
102+
"\tMajor (requiring I/O) page faults: 0\n",
103+
"\tMinor (reclaiming a frame) page faults: 173548943\n",
104+
"\tVoluntary context switches: 3628008\n",
105+
"\tInvoluntary context switches: 2636935\n",
106+
"\tSwaps: 0\n",
107+
"\tFile system inputs: 0\n",
108+
"\tFile system outputs: 30902112\n",
109+
"\tSocket messages sent: 0\n",
110+
"\tSocket messages received: 0\n",
111+
"\tSignals delivered: 0\n",
112+
"\tPage size (bytes): 4096\n",
113+
"\tExit status: 0\n"
114+
]
115+
}
116+
],
117+
"source": [
118+
"%%bash\n",
119+
"cd /scratch/\n",
120+
"find /scratch/ -name \"pgr-tk-HGRP-y1-evaluation-set-v0.agc\" > /scratch/pgr-tk-HGRP-y1-evaluation-set-v0_input\n",
121+
"\\time -v /code/pgr-mdb /scratch/pgr-tk-HGRP-y1-evaluation-set-v0_input /scratch/pgr-tk-HGRP-y1-evaluation-set-v0"
122+
]
123+
},
124+
{
125+
"cell_type": "code",
126+
"execution_count": 4,
127+
"id": "548fbc77",
128+
"metadata": {},
129+
"outputs": [
130+
{
131+
"name": "stdout",
132+
"output_type": "stream",
133+
"text": [
134+
"ls: cannot access '/scratch/HPRC-y1-pgr-tk-analysis-set*': No such file or directory\n"
135+
]
136+
}
137+
],
138+
"source": [
139+
"!ls -lh /scratch/HPRC-y1-pgr-tk-analysis-set*"
140+
]
141+
},
142+
{
143+
"cell_type": "code",
144+
"execution_count": null,
145+
"id": "b2f00a3c",
146+
"metadata": {},
147+
"outputs": [],
148+
"source": []
149+
}
150+
],
151+
"metadata": {
152+
"kernelspec": {
153+
"display_name": "Python 3",
154+
"language": "python",
155+
"name": "python3"
156+
},
157+
"language_info": {
158+
"codemirror_mode": {
159+
"name": "ipython",
160+
"version": 3
161+
},
162+
"file_extension": ".py",
163+
"mimetype": "text/x-python",
164+
"name": "python",
165+
"nbconvert_exporter": "python",
166+
"pygments_lexer": "ipython3",
167+
"version": "3.8.5"
168+
}
169+
},
170+
"nbformat": 4,
171+
"nbformat_minor": 5
172+
}

0 commit comments

Comments
 (0)