-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfile_splitter.py
executable file
·53 lines (46 loc) · 1.41 KB
/
file_splitter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import sys
import os
import fadvise
BUF_SIZE = 1024 * 1024
def copy(fin, fout, l):
if l > 0:
while l > 0:
buf = fin.read(min(l, BUF_SIZE))
l -= len(buf)
fout.write(buf)
else:
while True:
buf = fin.read(BUF_SIZE)
if buf == '':
return
fout.write(buf)
def main():
orig_file = sys.argv[1]
new_model = sys.argv[2]
if len(sys.argv) > 3:
start_chunk = int(sys.argv[3])
forig = open(orig_file)
prev_pos = 0
new_num = 0
for line in sys.stdin:
pos = int(line.strip())
diff = pos - prev_pos
assert diff >= 0
if new_num >= start_chunk:
filename = new_model % (new_num)
with open(new_model % (new_num), 'w') as fdest:
fadvise.posix_fadvise(forig.fileno(), 0, forig.tell(), fadvise.POSIX_FADV_DONTNEED)
fadvise.posix_fadvise(forig.fileno(), forig.tell(), 0, fadvise.POSIX_FADV_SEQUENTIAL)
fadvise.posix_fadvise(fdest.fileno(), 0, 0, fadvise.POSIX_FADV_SEQUENTIAL)
copy(forig, fdest, diff)
else:
forig.seek(diff, os.SEEK_CUR)
prev_pos = pos
new_num += 1
# Copy last chunk
with open(new_model % (new_num), 'w') as fdest:
copy(forig, fdest, 0)
if __name__ == '__main__':
main()