-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathB2_ReadLabel.py
43 lines (30 loc) · 1.17 KB
/
B2_ReadLabel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
import multiprocessing as mp
from tqdm import tqdm
def ReadItem( item = None, labelPath = None ):
if not item\
or not labelPath\
or not os.path.exists( os.path.join( labelPath, item ) ): return None
content = ''
with open( os.path.join( labelPath, item ), 'r' ) as file:
content = file.read()
countLines = 0
words = []
for line in content.split( '\n' ):
if line == '': continue
countLines += 1
for word in line.split( ' ' ):
if word == '': continue
words.append( word )
return ( countLines, len( words ), len( words ) / countLines )
def ReadLabel( label = None, newFolder = 'dataset', multi = False ):
labelPath = os.path.join( os.getcwd(), newFolder, label )
if not label or not os.path.exists( labelPath ): return None
if multi:
# make colors
pool = mp.Pool( mp.cpu_count() - 1 )
results = pool.starmap( ReadItem, [ ( item, labelPath ) for item in tqdm( os.listdir( labelPath ), label ) ] )
pool.close()
else:
results = [ ReadItem( item, labelPath ) for item in os.listdir( labelPath ) ]
return [ results, label ]