-
Notifications
You must be signed in to change notification settings - Fork 1
/
main_matisse_kmeans.py
83 lines (67 loc) · 3.19 KB
/
main_matisse_kmeans.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import skimage
from tqdm import tqdm
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
from src.analysis.occurences_per_year import occurences_per_year
from src.commons.utils import *
import src.scraper.custom_image_pipeline as custom_image_pipeline
from src.preprocessing import preprocessing
from src.visualization import visualization
from src.analysis.colors_clustering_kmeans import *
from src.analysis.occurences_per_year import *
from src.scraper.scraper_matisse import *
import numpy as np
import pandas as pd
from src.constants import *
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from pathlib import Path
if __name__ == "__main__":
process.crawl(PaintingsSpider)
process.start() # the script will block here until the crawling is finished
print('hello')
Path(DATA_FOLDER).mkdir(parents=True, exist_ok=True)
Path(IMAGE_FOLDER).mkdir(parents=True, exist_ok=True)
Path(IMAGE_RESHAPED).mkdir(parents=True, exist_ok=True)
Path(OUTPUT_FOLDER).mkdir(parents=True, exist_ok=True)
data = pd.read_csv('{}/data.csv'.format(DATA_FOLDER))
preprocessing.resize_images(data, size=(200,200))
N_CLUSTERS = 20
# Stack images
stacked_images = None
fitted_model = None
for idx, row in data.iterrows():
img = utils.load_image('{}/{}.jpg'.format(IMAGE_RESHAPED, row.title))
reshaped = img.reshape((-1, 3))
if stacked_images is None:
stacked_images = reshaped
#break
else:
stacked_images = np.concatenate((stacked_images, reshaped))
fitted_model = cluster_colors_rgb(stacked_images, N_CLUSTERS,
filename='{}/colors_{}.jpg'.format(OUTPUT_FOLDER, 'total'))
# Predict clusters
for year in data['year'].unique():
stacked_images = None
for idx, row in data[data['year'] == year].iterrows():
img = utils.load_image('{}/{}.jpg'.format(IMAGE_RESHAPED, row.title))
reshaped = img.reshape((-1, 3))
if stacked_images is None:
stacked_images = reshaped
else:
stacked_images = np.concatenate((stacked_images, reshaped))
generate_color_scheme_images(stacked_images, fitted_model, filename='{}/colors_{}'.format(OUTPUT_FOLDER, year))
# Predict clusters
for idx, row in data.iterrows():
stacked_images = None
img = utils.load_image('{}/{}.jpg'.format(IMAGE_RESHAPED, row.title))
reshaped = img.reshape((-1, 3))
Path('{}/images_schemes'.format(OUTPUT_FOLDER)).mkdir(parents=True, exist_ok=True)
generate_color_scheme_images(reshaped, fitted_model,
filename='{}/images_schemes/{}'.format(OUTPUT_FOLDER, row.title))
for idx, row in data.iterrows():
img = utils.load_image('{}/{}.jpg'.format(IMAGE_RESHAPED, row.title))
reshaped = img.reshape((-1, 3))
Path('{}/clustered_images'.format(OUTPUT_FOLDER)).mkdir(parents=True, exist_ok=True)
generate_clustered_images(img, reshaped, fitted_model,
filename='{}/clustered_images/colors_{}.jpg'.format(OUTPUT_FOLDER, row.title))
occurences_per_year()