Skip to content

Commit bd1696c

Browse files
authored
Merge branch 'main' into roicluster
2 parents ff9a29d + 76ec895 commit bd1696c

File tree

4 files changed

+22
-36
lines changed

4 files changed

+22
-36
lines changed

examples/sahi_detection.ipynb

+6-25
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,7 @@
44
"cell_type": "markdown",
55
"id": "5fec46c020b895a",
66
"metadata": {
7-
"collapsed": false,
8-
"jupyter": {
9-
"outputs_hidden": false
10-
}
7+
"collapsed": false
118
},
129
"source": [
1310
"# Welcome to the SAHI detection notebook. \n",
@@ -19,10 +16,7 @@
1916
"cell_type": "markdown",
2017
"id": "3ce3d68ddd33bd3c",
2118
"metadata": {
22-
"collapsed": false,
23-
"jupyter": {
24-
"outputs_hidden": false
25-
}
19+
"collapsed": false
2620
},
2721
"source": [
2822
"# First, import needed dependencies"
@@ -37,7 +31,6 @@
3731
"source": [
3832
"import sys\n",
3933
"sys.path.append('..')\n",
40-
"from pathlib import Path\n",
4134
"from huggingface_hub import hf_hub_download\n",
4235
"from pathlib import Path\n",
4336
"from sdcat.detect.sahi_detector import run_sahi_detect\n",
@@ -48,10 +41,7 @@
4841
"cell_type": "markdown",
4942
"id": "a366e55bf59e7a79",
5043
"metadata": {
51-
"collapsed": false,
52-
"jupyter": {
53-
"outputs_hidden": false
54-
}
44+
"collapsed": false
5545
},
5646
"source": [
5747
"# Run detection\n",
@@ -65,10 +55,7 @@
6555
"id": "9f9dd3f8-c886-4d32-a203-5204a343e721",
6656
"metadata": {
6757
"collapsed": false,
68-
"is_executing": true,
69-
"jupyter": {
70-
"outputs_hidden": false
71-
}
58+
"is_executing": true
7259
},
7360
"outputs": [
7461
{
@@ -172,10 +159,7 @@
172159
"cell_type": "markdown",
173160
"id": "3bda2110765d92e2",
174161
"metadata": {
175-
"collapsed": false,
176-
"jupyter": {
177-
"outputs_hidden": false
178-
}
162+
"collapsed": false
179163
},
180164
"source": [
181165
"# Results"
@@ -185,10 +169,7 @@
185169
"cell_type": "markdown",
186170
"id": "ae874cee7b4e83a9",
187171
"metadata": {
188-
"collapsed": false,
189-
"jupyter": {
190-
"outputs_hidden": false
191-
}
172+
"collapsed": false
192173
},
193174
"source": [
194175
"Results are saved to the **csv_out_path**. Let's take a look at the first few rows of the dataframe."

sdcat/cluster/cluster.py

+11-6
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,19 @@
1010
import json
1111
import seaborn as sns
1212
import numpy as np
13-
from matplotlib import pyplot as plt
1413
from umap import UMAP
1514
from hdbscan import HDBSCAN
1615
from sklearn.metrics.pairwise import cosine_similarity
16+
from sklearn.preprocessing import MinMaxScaler
1717
from sdcat.logger import info, warn, debug, err
1818
from sdcat.cluster.utils import cluster_grid, crop_square_image, square_image
1919
from sdcat.cluster.embedding import fetch_embedding, has_cached_embedding, compute_norm_embedding
2020

21+
if find_spec("multicore_tsne"):
22+
from multicore_tsne import MulticoreTSNE as TSNE
23+
else:
24+
from sklearn.manifold import TSNE
25+
2126
if find_spec("cuml"):
2227
info('=======> USING GPU for HDBSCAN AND UMAP <=========')
2328
from cuml.cluster import HDBSCAN as cuHDBSCAN # pylint: disable=E0611, E0401
@@ -75,10 +80,9 @@ def _run_hdbscan_assign(
7580
# Get the number of samples which is the number of rows in the dataframe - this is used mostly for calculating coverage
7681
num_samples = df.shape[0]
7782

78-
# Compute the cosine similarity matrix
79-
cosine_sim_matrix = cosine_similarity(df.values)
80-
distance_matrix = 1 - cosine_sim_matrix
81-
x = distance_matrix.astype(np.float64)
83+
tsne = TSNE(n_components=2, perplexity=40, metric="cosine", n_jobs=8, random_state=42, verbose=True)
84+
embedding = tsne.fit_transform(df.values)
85+
x = MinMaxScaler().fit_transform(embedding) # scale the embedding to 0-1
8286

8387
# Cluster the embeddings using HDBSCAN
8488
if have_gpu:
@@ -93,7 +97,7 @@ def _run_hdbscan_assign(
9397
labels = scan.fit_predict(x)
9498
else:
9599
scan = HDBSCAN(
96-
metric='precomputed',
100+
metric='l2',
97101
allow_single_cluster=True,
98102
min_cluster_size=min_cluster_size,
99103
min_samples=min_samples,
@@ -206,6 +210,7 @@ def _run_hdbscan_assign(
206210
df = pd.DataFrame({'x': xx[clustered, 0], 'y': xx[clustered, 1], 'labels': labels[clustered]})
207211
p = sns.jointplot(data=df, x='x', y='y', hue='labels')
208212
p.savefig(f"{out_path}/{prefix}_summary.png")
213+
info(f"Saved {out_path}/{prefix}_summary.png")
209214

210215
with open(f'{out_path}/{prefix}_summary.json', 'w') as f:
211216
json.dump(params, f)

sdcat/cluster/commands.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,7 @@ def is_day(utc_dt):
260260
shutil.copy(Path(config_ini), save_dir / f'{prefix}_config.ini')
261261
else:
262262
warn(f'No detections found to cluster')
263-
264-
263+
265264
@click.command('roi', help='Cluster roi. See cluster --config-ini to override cluster defaults.')
266265
@common_args.config_ini
267266
@click.option('--roi-dir', help='Input folder(s) with raw ROI images', multiple=True)
@@ -272,8 +271,6 @@ def is_day(utc_dt):
272271
@click.option('--min-cluster-size', help='The minimum number of samples in a group for that group to be considered a cluster. Default is 2. Increase for less conservative clustering, e.g. 5, 15', type=int)
273272
def run_cluster_roi(roi_dir, save_dir, device, config_ini, alpha, cluster_selection_epsilon, min_cluster_size):
274273
config = cfg.Config(config_ini)
275-
max_area = int(config('cluster', 'max_area'))
276-
min_area = int(config('cluster', 'min_area'))
277274
min_samples = int(config('cluster', 'min_samples'))
278275
alpha = alpha if alpha else float(config('cluster', 'alpha'))
279276
min_cluster_size = min_cluster_size if min_cluster_size else int(config('cluster', 'min_cluster_size'))

sdcat/cluster/embedding.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from numpy import save, load
1212
import numpy as np
1313
from sahi.utils.torch import torch
14+
from torchvision import transforms as pth_transforms
1415
import torch.nn as nn
1516
import cv2
1617
from sdcat.logger import info, err
@@ -119,8 +120,10 @@ def compute_embedding(images: list, model_name: str):
119120

120121
image = np.array(square_img)
121122

122-
# Convert the image to a tensor
123+
norm_transform = pth_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
123124
img_tensor = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0
125+
# Noramlize the tensor with the mean and std of the ImageNet dataset
126+
img_tensor = norm_transform(img_tensor)
124127
img_tensor = img_tensor.unsqueeze(0) # Add batch dimension
125128
if 'cuda' in device:
126129
img_tensor = img_tensor.to(device)

0 commit comments

Comments
 (0)