-
Notifications
You must be signed in to change notification settings - Fork 0
/
.Rhistory
122 lines (122 loc) · 4.23 KB
/
.Rhistory
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
library(reticulate)
reticulate::repl_python()
import os
no
import os
import glob
import re as re
from datetime import timedelta
import pandas as pd
def import_snapshots(snapshotdir, camera='vis'):
"""
Input:
snapshotdir = directory of .tif files
camera = the camera which captured the images. 'vis' or 'psii'
Export .png into data/<camera> folder from LemnaBase using data-science-tools/LT-db-extractor.py
for example: C6-GoldStandard_PSII-20190312T000911-PSII0-15.png
"""
# %% Get metadata from .tifs
# snapshotdir = 'data/raw_snapshots/psII'
fns = [fn for fn in glob.glob(pathname=os.path.join(snapshotdir, '*.png'))]
fns
flist = list()
for fn in fns:
f = re.split('[-]', os.path.splitext(os.path.basename(fn))[0])
f.append(fn)
flist.append(f)
fdf = pd.DataFrame(flist,
columns=[
'plantbarcode', 'experiment', 'timestamp',
'cameralabel', 'frameid', 'filename'
])
# convert date and time columns to datetime format
fdf['datetime'] = pd.to_datetime(fdf['timestamp'])
fdf['jobdate'] = fdf.datetime.dt.floor('d')
if camera.upper() == 'PSII':
#create a jobdate to match dark and light measurements. dark experiments after 8PM correspond to the next day's light experiments
fdf.loc[fdf.datetime.dt.hour >= 20,
'jobdate'] = fdf.loc[fdf.datetime.dt.hour >= 20,
'jobdate'] + timedelta(days=1)
# convert image id from string to integer that can be sorted numerically
fdf['frameid'] = fdf.frameid.astype('uint8')
fdf = fdf.sort_values(['plantbarcode', 'datetime', 'frameid'])
def import_snapshots(snapshotdir, camera='vis'):
"""
Input:
snapshotdir = directory of .tif files
camera = the camera which captured the images. 'vis' or 'psii'
Export .png into data/<camera> folder from LemnaBase using data-science-tools/LT-db-extractor.py
for example: C6-GoldStandard_PSII-20190312T000911-PSII0-15.png
"""
# %% Get metadata from .tifs
quit
reticulate::source_python('~/cppcpyutils/src/cppcpyutils/data/import_snapshots.py')
import_snapshots('.')
restart()
library(reticulate)
reticulate::repl_python()
import os
exit()
quit9)
quit()
use_condaenv(condaenv = "r-nlp", conda = "/opt/anaconda3/bin/conda")
use_condaenv(condaenv = "r-nlp", conda = "/opt/anaconda3/bin/conda")
library(reticulate)
quit()
exit()
SystemExit
SystemExit()
use_condaenv(condaenv = "plantcv")
reticulate::use_condaenv(condaenv = "plantcv")
library(reticulate)
import os
py_discover_config()
reticulate::repl_python()
import os
from plantcv import plantcv as pcv
import os
import glob
import re as re
from datetime import timedelta
import pandas as pd
def import_snapshots(snapshotdir, camera='vis'):
"""
Input:
snapshotdir = directory of .tif files
camera = the camera which captured the images. 'vis' or 'psii'
Export .png into data/<camera> folder from LemnaBase using data-science-tools/LT-db-extractor.py
for example: C6-GoldStandard_PSII-20190312T000911-PSII0-15.png
"""
# %% Get metadata from .tifs
# snapshotdir = 'data/raw_snapshots/psII'
fns = [fn for fn in glob.glob(pathname=os.path.join(snapshotdir, '*.png'))]
fns
flist = list()
for fn in fns:
f = re.split('[-]', os.path.splitext(os.path.basename(fn))[0])
f.append(fn)
flist.append(f)
fdf = pd.DataFrame(flist,
columns=[
'plantbarcode', 'experiment', 'timestamp',
'cameralabel', 'frameid', 'filename'
])
# convert date and time columns to datetime format
fdf['datetime'] = pd.to_datetime(fdf['timestamp'])
fdf['jobdate'] = fdf.datetime.dt.floor('d')
if camera.upper() == 'PSII':
#create a jobdate to match dark and light measurements. dark experiments after 8PM correspond to the next day's light experiments
fdf.loc[fdf.datetime.dt.hour >= 20,
'jobdate'] = fdf.loc[fdf.datetime.dt.hour >= 20,
'jobdate'] + timedelta(days=1)
# convert image id from string to integer that can be sorted numerically
fdf['frameid'] = fdf.frameid.astype('uint8')
fdf = fdf.sort_values(['plantbarcode', 'datetime', 'frameid'])
fdf = fdf.set_index(['plantbarcode', 'experiment', 'datetime',
'jobdate']).drop(columns=['timestamp'])
# check for duplicate jobs of the same sample on the same day. if jobs_removed.csv isnt blank then you shyould investigate!
#dups = fdf.reset_index('datetime',drop=False).set_index(['frameid'],append=True).index.duplicated(keep='first')
#dups_to_remove = fdf[dups].drop(columns=['frameid','filename']).reset_index().drop_duplicates()
#dups_to_remove.to_csv('jobs_removed.csv',sep='\t')
#
return fdf