Skip to content

Commit

Permalink
fix queue
Browse files Browse the repository at this point in the history
  • Loading branch information
rwood-97 committed Feb 5, 2024
1 parent 8ba4cb3 commit cf91abb
Showing 1 changed file with 103 additions and 110 deletions.
213 changes: 103 additions & 110 deletions mapreader/annotate/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def __init__(
raise ValueError(
"[ERROR] ``patch_df`` must be a path to a csv or a pandas DataFrame."
)
self._eval_df(patch_df) # eval tuples/lists in df
patch_df = self._eval_df(patch_df) # eval tuples/lists in df

if parent_df is not None:
if isinstance(parent_df, str):
Expand All @@ -150,7 +150,7 @@ def __init__(
raise ValueError(
"[ERROR] ``parent_df`` must be a path to a csv or a pandas DataFrame."
)
self._eval_df(parent_df) # eval tuples/lists in df
parent_df = self._eval_df(parent_df) # eval tuples/lists in df

if patch_df is None:
# If we don't get patch data provided, we'll use the patches and parents to create the dataframes
Expand Down Expand Up @@ -183,7 +183,6 @@ def __init__(
# Add label column if not present
if label_col not in patch_df.columns:
patch_df[label_col] = None
patch_df["changed"] = False

# Check for image paths column
if patch_paths_col not in patch_df.columns:
Expand Down Expand Up @@ -214,47 +213,15 @@ def __init__(
# Ensure unique values in list
labels = sorted(set(labels), key=labels.index)

# Test for existing file
# Test for existing annotation file
if os.path.exists(annotations_file):
print(f"[INFO] Loading existing annotations for {username}.")
existing_annotations = pd.read_csv(
annotations_file, index_col=0, sep=delimiter
)

if label_col not in existing_annotations.columns:
raise ValueError(
f"[ERROR] Your existing annotations do not have the label column: {label_col}."
)

print(existing_annotations[label_col].dtype)

if existing_annotations[label_col].dtype == int:
# convert label indices (ints) to labels (strings)
# this is to convert old annotations format to new annotations format
existing_annotations[label_col] = existing_annotations[label_col].apply(
lambda x: labels[x]
)

patch_df = patch_df.join(
existing_annotations, how="left", lsuffix="_x", rsuffix="_y"
)
patch_df[label_col] = patch_df["label_y"].fillna(patch_df[f"{label_col}_x"])
patch_df = patch_df.drop(
columns=[
f"{label_col}_x",
f"{label_col}_y",
]
)
patch_df["changed"] = patch_df[label_col].apply(
lambda x: True if x else False
)

patch_df[patch_paths_col] = patch_df[f"{patch_paths_col}_x"]
patch_df = patch_df.drop(
columns=[
f"{patch_paths_col}_x",
f"{patch_paths_col}_y",
]
print("[INFO] Loading existing patch annotations.")
patch_df = self._load_annotations(
patch_df=patch_df,
annotations_file=annotations_file,
labels=labels,
label_col=label_col,
delimiter=delimiter,
)

# initiate as a DataFrame
Expand Down Expand Up @@ -288,8 +255,6 @@ def __init__(
self._min_values = min_values or {}
self._max_values = max_values or {}

self.patch_width, self.patch_height = self.get_patch_size()

# Create annotations_dir
Path(annotations_dir).mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -324,7 +289,7 @@ def __init__(
self._setup_box()

# Setup queue
self._queue = self.get_queue()
self._queue = []

@staticmethod
def _load_dataframes(
Expand Down Expand Up @@ -373,32 +338,61 @@ def _load_dataframes(

return parent_df, patch_df

def _eval_df(self, df):
@staticmethod
def _eval_df(df):
for col in df.columns:
try:
df[col] = df[col].apply(literal_eval)
except (ValueError, TypeError, SyntaxError):
pass
return df

def get_patch_size(self):
"""
Calculate and return the width and height of the patches based on the
first patch of the DataFrame, assuming the same shape of patches
across the frame.
@staticmethod
def _load_annotations(
patch_df: pd.DataFrame,
annotations_file: str,
labels: list,
label_col: str,
delimiter: str,
):
"""Load existing annotations from file.
Parameters
----------
patch_df : pd.DataFrame
Current patch dataframe.
annotations_file : str
Name of the annotations file
labels : list
List of labels for annotation.
label_col : str
Name of the column in which labels are stored in annotations file
delimiter : str
Delimiter used in CSV files
Returns
-------
Tuple[int, int]
Width and height of the patches.
"""
patch_width = (
self.sort_values("min_x").max_x[0] - self.sort_values("min_x").min_x[0]
)
patch_height = (
self.sort_values("min_y").max_y[0] - self.sort_values("min_y").min_y[0]
existing_annotations = pd.read_csv(annotations_file, index_col=0, sep=delimiter)

if label_col not in existing_annotations.columns:
raise ValueError(
f"[ERROR] Your existing annotations do not have the label column: {label_col}."
)

if existing_annotations[label_col].dtype == int:
# convert label indices (ints) to labels (strings)
# this is to convert old annotations format to new annotations format
existing_annotations[label_col] = existing_annotations[label_col].apply(
lambda x: labels[x]
)

patch_df = patch_df.join(
existing_annotations[label_col], how="left", rsuffix="_existing"
)
if f"{label_col}_existing" in patch_df.columns:
patch_df[label_col].fillna(patch_df[f"{label_col}_existing"], inplace=True)
patch_df.drop(columns=f"{label_col}_existing", inplace=True)

return patch_width, patch_height
return patch_df

def _setup_buttons(self) -> None:
"""
Expand Down Expand Up @@ -450,7 +444,7 @@ def get_queue(
self, as_type: str | None = "list"
) -> list[int] | (pd.Index | pd.Series):
"""
Gets the indices of rows which are legible for annotation.
Gets the indices of rows which are eligible for annotation.
Parameters
----------
Expand All @@ -466,8 +460,8 @@ def get_queue(
pd.Index object, or a pd.Series of legible rows.
"""

def check_legibility(row):
if row.label is not None:
def check_eligibility(row):
if row.label not in [np.NaN, None]:
return False

test = [
Expand All @@ -479,18 +473,17 @@ def check_legibility(row):

return True

test = self.copy()
test["eligible"] = test.apply(check_legibility, axis=1)
test = test[
["eligible"] + [col for col in test.columns if not col == "eligible"]
]
queue_df = self.copy(deep=True)
queue_df = queue_df[queue_df[self.label_col].isna()] # only unlabelled
queue_df["eligible"] = queue_df.apply(check_eligibility, axis=1)
queue_df = queue_df[queue_df.eligible].sample(frac=1) # shuffle

indices = test[test.eligible].index
indices = queue_df.index
if as_type == "list":
return list(indices)
if as_type == "index":
return indices
return test[test.eligible]
return queue_df

def get_context(self):
"""
Expand All @@ -514,9 +507,16 @@ def get_path(image_path, dim=True):
im = Image.fromarray(im_array.astype(np.uint8))
return im

def get_empty_square():
def get_empty_square(patch_size: tuple[int, int]):
"""Generates an empty square image.
Parameters
----------
patch_size : tuple[int, int]
Patch size in pixels as tuple of `(width, height)`.
"""
im = Image.new(
size=(self.patch_width, self.patch_height),
size=patch_size,
mode="RGB",
color="white",
)
Expand All @@ -531,17 +531,26 @@ def get_empty_square():

ix = self._queue[self.current_index]

x = self.at[ix, "min_x"]
y = self.at[ix, "min_y"]
current_parent = self.at[ix, "parent_id"]
min_x = self.at[ix, "min_x"]
min_y = self.at[ix, "min_y"]

# cannot assume all patches are same size
try:
height, width, _ = self.at[ix, "shape"]
except KeyError:
im_path = self.at[ix, self.patch_paths_col]
im = Image.open(im_path)
height = im.height
width = im.width

current_parent = self.at[ix, "parent_id"]
parent_frame = self.query(f"parent_id=='{current_parent}'")

deltas = list(range(-self.surrounding, self.surrounding + 1))
y_and_x = list(
product(
[y + y_delta * self.patch_height for y_delta in deltas],
[x + x_delta * self.patch_width for x_delta in deltas],
[min_y + y_delta * height for y_delta in deltas],
[min_x + x_delta * width for x_delta in deltas],
)
)
queries = [f"min_x == {x} & min_y == {y}" for y, x in y_and_x]
Expand All @@ -562,12 +571,15 @@ def get_empty_square():
# split them into rows
per_row = len(deltas)
images = [
[get_path(x[0], dim=x[1]) if x[0] else get_empty_square() for x in lst]
[
get_path(x[0], dim=x[1]) if x[0] else get_empty_square((width, height))
for x in lst
]
for lst in array_split(image_list, per_row)
]

total_width = (2 * self.surrounding + 1) * self.patch_width
total_height = (2 * self.surrounding + 1) * self.patch_height
total_width = (2 * self.surrounding + 1) * width
total_height = (2 * self.surrounding + 1) * height

context_image = Image.new("RGB", (total_width, total_height))

Expand All @@ -576,8 +588,8 @@ def get_empty_square():
x_offset = 0
for image in row:
context_image.paste(image, (x_offset, y_offset))
x_offset += self.patch_width
y_offset += self.patch_height
x_offset += width
y_offset += height

if self.resize_to is not None:
context_image = ImageOps.contain(
Expand Down Expand Up @@ -667,21 +679,12 @@ def _next_example(self, *_) -> tuple[int, int, str]:
Tuple[int, int, str]
Previous index, current index, and path of the current image.
"""
if not len(self._queue):
if self.current_index == len(self._queue):
self.render_complete()
return

if isinstance(self.current_index, type(None)) or self.current_index == -1:
self.current_index = 0
else:
current_index = self.current_index + 1

try:
self._queue[current_index]
self.previous_index = self.current_index
self.current_index = current_index
except IndexError:
pass
self.previous_index = self.current_index
self.current_index += 1

ix = self._queue[self.current_index]

Expand All @@ -699,21 +702,13 @@ def _prev_example(self, *_) -> tuple[int, int, str]:
Tuple[int, int, str]
Previous index, current index, and path of the current image.
"""
if not len(self._queue):
if self.current_index == len(self._queue):
self.render_complete()
return

current_index = self.current_index - 1

if current_index < 0:
current_index = 0

try:
self._queue[current_index]
self.previous_index = current_index - 1
self.current_index = current_index
except IndexError:
pass
if self.current_index > 0:
self.previous_index = self.current_index
self.current_index -= 1

ix = self._queue[self.current_index]

Expand All @@ -738,7 +733,6 @@ def render(self) -> None:
self.render_complete()
return

# ix = self.iloc[self.current_index].name
ix = self._queue[self.current_index]

# render buttons
Expand Down Expand Up @@ -832,7 +826,6 @@ def _add_annotation(self, annotation: str) -> None:
# ix = self.iloc[self.current_index].name
ix = self._queue[self.current_index]
self.at[ix, self.label_col] = annotation
self.at[ix, "changed"] = True
if self.auto_save:
self._auto_save()
self._next_example()
Expand Down

0 comments on commit cf91abb

Please sign in to comment.