-
Notifications
You must be signed in to change notification settings - Fork 13
/
indexdir.lua
77 lines (68 loc) · 3.05 KB
/
indexdir.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
-- Ref.: A. https://github.com/facebook/fbcunn/blob/master/examples/imagenet/dataset.lua
-- e.g. fileList = torch.indexdir("/path/to/files/", 'png', true)
-- index the directory by creating a chartensor of files paths.
-- returns an object with can be used to efficiently list files in dir
function paths.indexdir(pathList, extensionList, use_cache, ignorePattern)
extensionList = extensionList or {'jpg', 'png','JPG','PNG','JPEG', 'ppm', 'PPM', 'bmp', 'BMP'}
extensionList = (torch.type(extensionList) == 'string') and {extensionList} or extensionList
-- define command-line tools, try your best to maintain OSX compatibility
local wc = 'wc'
local cut = 'cut'
local find = 'find'
if jit and jit.os == 'OSX' then
wc = 'gwc'
cut = 'gcut'
find = 'gfind'
end
local fileList = torch.CharTensor() -- path to each image in dataset
pathList = (torch.type(pathList) == 'string') and {pathList} or pathList
-- repository name makes cache file unique
local unique = table.concat(pathList)..table.concat(extensionList)
-- use hash to make string shorter
local findFile = torch.md5.sumhexa(unique)
findFile = paths.concat(paths.dirname(os.tmpname()), findFile)
if not (use_cache and paths.filep(findFile)) then
if paths.filep(findFile) then
os.execute("rm "..findFile)
end
-- Options for the GNU find command
local findOptions = ' -iname "*.' .. extensionList[1] .. '"'
for i=2,#extensionList do
findOptions = findOptions .. ' -o -iname "*.' .. extensionList[i] .. '"'
end
if ignorePattern then
findOptions = '! -iname "'..ignorePattern..'" \\(' .. findOptions .. " \\)"
end
for i, path in ipairs(pathList) do
-- run "find" on each directory, and concatenate all
-- those filenames into a single file containing all file paths
local command = find .. ' "' .. path .. '" ' .. findOptions .. ' >> "' .. findFile .. '"'
os.execute(command)
end
end
-- load the large concatenated list of file paths to fileList
local maxPathLength = tonumber(sys.fexecute(wc .. " -L '"
.. findFile .. "' |"
.. cut .. " -f1 -d' '")) + 1
local length = tonumber(sys.fexecute(wc .. " -l '"
.. findFile .. "' |"
.. cut .. " -f1 -d' '"))
assert(length > 0, "Could not find any files in the given input paths")
assert(maxPathLength > 0, "paths of files are length 0?")
fileList:resize(length, maxPathLength):fill(0)
local s_data = fileList:data()
local count = 0
for line in io.lines(findFile) do
ffi.copy(s_data, line)
s_data = s_data + maxPathLength
count = count + 1
end
local obj = {tensor=fileList,cachefile=findFile}
function obj:filename(i)
return ffi.string(torch.data(self.tensor[i]))
end
function obj:size()
return self.tensor:size(1)
end
return obj
end