-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathflow_dataframe.py
33 lines (31 loc) · 1.61 KB
/
flow_dataframe.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import numpy as np
import pandas as pd
import os
def flow_from_dataframe(img_data_gen, in_df, path_col, y_col, **dflow_args):
"""
Creates a DirectoryIterator from in_df at path_col with image preprocessing defined by img_data_gen. The labels
are specified by y_col.
:param img_data_gen: an ImageDataGenerator
:param in_df: a DataFrame with images
:param path_col: name of column in in_df for path
:param y_col: name of column in in_df for y values/labels
:param dflow_args: additional arguments to flow_from_directory
:return: df_gen (keras.preprocessing.image.DirectoryIterator)
"""
print('flow_from_dataframe() -->')
base_dir = os.path.dirname(in_df[path_col].values[0])
print('## Ignore next message from keras, values are replaced anyways')
# flow_from_directory: Takes the path to a directory, and generates batches of augmented/normalized data.
# sparse: a 1D integer label array is returned
df_gen = img_data_gen.flow_from_directory(base_dir, class_mode='sparse', **dflow_args)
# df_gen: A DirectoryIterator yielding tuples of (x, y) where x is a numpy array containing a batch of images
# with shape (batch_size, *target_size, channels) and y is a numpy array of corresponding labels.
df_gen.filenames = in_df[path_col].values
df_gen.classes = np.stack(in_df[y_col].values)
df_gen.samples = in_df.shape[0]
df_gen.n = in_df.shape[0]
df_gen._set_index_array()
df_gen.directory = base_dir # since we have the full path
print('Reinserting dataframe: {} images'.format(in_df.shape[0]))
print('flow_from_dataframe() <--')
return df_gen