1
1
package com .conveyal .datatools .manager ;
2
2
3
- import com .conveyal .datatools .common . utils . aws . CheckedAWSException ;
3
+ import com .conveyal .datatools .manager . models . FeedSource ;
4
4
import com .conveyal .datatools .manager .models .FeedVersion ;
5
+ import com .conveyal .datatools .manager .models .FeedVersionSummary ;
6
+ import com .conveyal .datatools .manager .models .Project ;
5
7
import com .conveyal .datatools .manager .persistence .FeedStore ;
6
8
import com .conveyal .datatools .manager .persistence .Persistence ;
7
- import com .conveyal .gtfs .GTFS ;
8
9
import com .conveyal .gtfs .util .InvalidNamespaceException ;
10
+ import com .mongodb .client .model .Sorts ;
9
11
import com .conveyal .gtfs .util .Util ;
10
12
import com .google .common .collect .Lists ;
11
13
import com .mongodb .client .model .Projections ;
26
28
import java .sql .PreparedStatement ;
27
29
import java .sql .ResultSet ;
28
30
import java .sql .SQLException ;
31
+ import java .util .Collection ;
29
32
import java .util .ArrayList ;
33
+ import java .util .Collections ;
30
34
import java .util .HashSet ;
31
35
import java .util .List ;
32
36
import java .util .Set ;
33
37
import java .util .stream .Collectors ;
34
38
35
39
import static com .conveyal .datatools .manager .DataManager .GTFS_DATA_SOURCE ;
36
40
import static com .conveyal .datatools .manager .DataManager .initializeApplication ;
41
+ import static com .mongodb .client .model .Filters .eq ;
37
42
import static com .mongodb .client .model .Aggregates .project ;
38
43
import static com .mongodb .client .model .Filters .nin ;
39
44
40
45
/**
41
- * The Data sanitizer requires the env.yml and server.yml files for configuration. Data sanitizer specific command-line parameters
42
- * should be provided after these e.g.:
46
+ * The Data sanitizer requires the env.yml and server.yml files for configuration. Data sanitizer specific command-line
47
+ * parameters should be provided after these e.g.:
43
48
* configurations/test/env.yml.tmp configurations/test/server.yml.tmp --orphaned delete (or -O d)
44
49
*/
45
50
public class DataSanitizer {
@@ -56,23 +61,44 @@ public static void main(String[] args) throws IOException {
56
61
*/
57
62
public static void parseArguments (String [] arguments ) {
58
63
Options options = new Options ();
59
- Option orphanedOption = Option .builder ("O" )
64
+ Option orphaned = Option .builder ("O" )
60
65
.longOpt ("orphaned" )
61
66
.desc ("Optional delete command for orphaned items" )
62
67
.optionalArg (true )
63
68
.argName ("deleteCommand" )
64
69
.build ();
65
- options .addOption (orphanedOption );
70
+ Option feedVersionAudit = Option .builder ("A" )
71
+ .longOpt ("audit" )
72
+ .desc ("Command for feed version audit" )
73
+ .hasArg (false )
74
+ .build ();
75
+ Option purge = Option .builder ("P" )
76
+ .longOpt ("purge-feed-versions" )
77
+ .desc ("Command for purging all but the latest feed version for a feed source" )
78
+ .hasArg (true )
79
+ .argName ("feedSourceId" )
80
+ .build ();
81
+ options .addOption (orphaned );
82
+ options .addOption (feedVersionAudit );
83
+ options .addOption (purge );
66
84
67
85
try {
68
86
CommandLineParser parser = new DefaultParser ();
69
87
CommandLine cmd = parser .parse (options , arguments );
70
88
if (cmd .hasOption ("O" )) {
71
89
String deleteCommand = cmd .getOptionValue ("O" );
72
90
boolean delete = "delete" .equalsIgnoreCase (deleteCommand ) || "d" .equalsIgnoreCase (deleteCommand );
73
- sanitizeFeedVersions (delete );
91
+ sanitizeOrphanedFeedVersions (delete );
74
92
sanitizeDBSchemas (delete );
75
93
}
94
+ if (cmd .hasOption ("A" )) {
95
+ feedVersionAudit ();
96
+ }
97
+ if (cmd .hasOption ("P" )) {
98
+ String feedSourceId = cmd .getOptionValue ("P" );
99
+ System .out .println ("Purge command received for feed source id: " + feedSourceId );
100
+ deleteObsoleteFeedVersions (feedSourceId , true );
101
+ }
76
102
} catch (ParseException e ) {
77
103
System .out .println (e .getMessage ());
78
104
HelpFormatter formatter = new HelpFormatter ();
@@ -84,7 +110,7 @@ public static void parseArguments(String[] arguments) {
84
110
/**
85
111
* Group orphaned feed versions and optionally delete.
86
112
*/
87
- public static int sanitizeFeedVersions (boolean delete ) {
113
+ public static int sanitizeOrphanedFeedVersions (boolean delete ) {
88
114
List <FeedVersion > feedVersions = getOrphanedFeedVersions ();
89
115
int orphaned = feedVersions .size ();
90
116
if (orphaned == 0 ) {
@@ -116,6 +142,91 @@ private static boolean hasGTFSPlus(FeedVersion feedVersion, FeedStore gtfsPlusSt
116
142
return DataManager .isModuleEnabled ("gtfsplus" ) && gtfsPlusStore .getFeed (feedVersion .id + ".db" ) != null ;
117
143
}
118
144
145
+ /**
146
+ * For a given feed source, delete all feed versions keeping just the latest.
147
+ */
148
+ public static void deleteObsoleteFeedVersions (String feedSourceId , boolean sourceIsCli ) {
149
+ deleteObsoleteFeedVersions (feedSourceId , 1 , sourceIsCli );
150
+ }
151
+
152
+ /**
153
+ * For a given feed source, delete feed version prior to the keep number.
154
+ */
155
+ public static int deleteObsoleteFeedVersions (
156
+ String feedSourceId ,
157
+ int numberOfVersionsToKeep ,
158
+ boolean sourceIsCli
159
+ ) {
160
+ Collection <FeedVersion > feedVersions = Persistence .feedVersions .getFiltered (
161
+ eq ("feedSourceId" , feedSourceId ),
162
+ Sorts .descending ("version" )
163
+ );
164
+ if (feedVersions .isEmpty () || numberOfVersionsToKeep >= feedVersions .size ()) {
165
+ String message = "No feed versions or none that qualify for deletion. Feed source id: " + feedSourceId ;
166
+ LOG .info (message );
167
+ if (sourceIsCli ) System .out .println (message );
168
+ return -1 ;
169
+ }
170
+
171
+ int keepCount = 0 ;
172
+ int deleteCount = 0 ;
173
+
174
+ for (FeedVersion feedVersion : feedVersions ) {
175
+ if (keepCount < numberOfVersionsToKeep ) {
176
+ keepCount ++;
177
+ } else {
178
+ feedVersion .delete ();
179
+ deleteCount ++;
180
+ }
181
+ }
182
+ String message = String .format ("Deleted %s feed versions from feed source id: %s" , deleteCount , feedSourceId );
183
+ LOG .info (message );
184
+ if (sourceIsCli ) System .out .println (message );
185
+ return deleteCount ;
186
+ }
187
+
188
+ /**
189
+ * Group feed source and number of feed versions.
190
+ */
191
+ public static List <FeedVersionAudit > feedVersionAudit () {
192
+ System .out .println ("Producing feed version audit..." );
193
+ List <FeedVersionAudit > audit = new ArrayList <>();
194
+
195
+ List <Project > projects = Persistence .projects .getAll ();
196
+
197
+ for (Project project : projects ) {
198
+ Collection <FeedSource > feedSources = project .retrieveProjectFeedSources ();
199
+
200
+ for (FeedSource feedSource : feedSources ) {
201
+ Collection <FeedVersionSummary > feedVersions = feedSource .retrieveFeedVersionSummaries ();
202
+ audit .add (new FeedVersionAudit (project .name , feedSource .name , feedSource .id , feedVersions .size ()));
203
+ }
204
+ }
205
+
206
+ Collections .sort (audit );
207
+
208
+ if (!audit .isEmpty ()) {
209
+ boolean hasHeader = false ;
210
+ for (FeedVersionAudit feedVersionAudit : audit ) {
211
+ if (!hasHeader ) {
212
+ System .out .printf ("%-40s | %-40s | %-40s | %s%n" , "Project" , "Feed Source" , "Feed Source Id" , "No. Feed Versions" );
213
+ hasHeader = true ;
214
+ }
215
+ System .out .printf (
216
+ "%-40s | %-40s | %-40s | %s%n" ,
217
+ feedVersionAudit .projectName ,
218
+ feedVersionAudit .feedSourceName ,
219
+ feedVersionAudit .feedSourceId ,
220
+ feedVersionAudit .numberOfFeedVersions
221
+ );
222
+ }
223
+ } else {
224
+ System .out .println ("No feed versions to audit!" );
225
+ }
226
+ System .out .println ("Feed version audit complete." );
227
+ return audit ;
228
+ }
229
+
119
230
/**
120
231
* Group orphaned schemas and optionally delete.
121
232
*/
@@ -142,13 +253,9 @@ public static void sanitizeDBSchemas(boolean delete) {
142
253
private static int deleteOrphanedFeedVersions (List <FeedVersion > feedVersions ) {
143
254
int deletedFeedVersions = 0 ;
144
255
for (FeedVersion feedVersion : feedVersions ) {
145
- try {
146
- System .out .println ("Deleting orphaned feed version: " + feedVersion .id );
147
- feedVersion .deleteOrphan ();
148
- deletedFeedVersions ++;
149
- } catch (SQLException | CheckedAWSException | InvalidNamespaceException e ) {
150
- System .err .printf ("Failed to delete feed version: %s. %s%n" , feedVersion .id , e .getMessage ());
151
- }
256
+ System .out .println ("Deleting orphaned feed version: " + feedVersion .id );
257
+ feedVersion .deleteOrphan ();
258
+ deletedFeedVersions ++;
152
259
}
153
260
return deletedFeedVersions ;
154
261
}
@@ -159,12 +266,7 @@ private static int deleteOrphanedFeedVersions(List<FeedVersion> feedVersions) {
159
266
public static int deleteOrphanedDBSchemas (Set <String > orphanedSchemas ) {
160
267
int deletedSchemas = 0 ;
161
268
for (String orphanedSchema : orphanedSchemas ) {
162
- try {
163
- GTFS .delete (orphanedSchema , DataManager .GTFS_DATA_SOURCE );
164
- LOG .info ("Dropped orphaned DB schema from Postgres." );
165
- } catch (SQLException | InvalidNamespaceException e ) {
166
- System .err .printf ("Failed to delete DB schema: %s. %s%n" , orphanedSchema , e .getMessage ());
167
- }
269
+ FeedVersion .deleteDBSchema (orphanedSchema );
168
270
deletedSchemas ++;
169
271
}
170
272
return deletedSchemas ;
@@ -192,7 +294,6 @@ public static Set<String> getOrphanedDBSchemas(Set<String> associatedSchemas) {
192
294
Set <String > orphanedSchemas = new HashSet <>();
193
295
try (Connection connection = GTFS_DATA_SOURCE .getConnection ()) {
194
296
String sql = String .format ("SELECT nspname FROM pg_namespace %s" , whereClause );
195
- LOG .info (sql );
196
297
PreparedStatement preparedStatement = connection .prepareStatement (sql );
197
298
ResultSet resultSet = preparedStatement .executeQuery ();
198
299
while (resultSet .next ()) {
@@ -240,4 +341,24 @@ public static Set<String> getFieldFromDocument(String field, String document) {
240
341
}
241
342
return fields ;
242
343
}
344
+
345
+ public static class FeedVersionAudit implements Comparable <FeedVersionAudit > {
346
+ public final String projectName ;
347
+ public final String feedSourceName ;
348
+ public final String feedSourceId ;
349
+ public final int numberOfFeedVersions ;
350
+
351
+ FeedVersionAudit (String projectName , String feedSourceName , String feedSourceId , int numberOfFeedVersions ) {
352
+ this .projectName = projectName ;
353
+ this .feedSourceName = feedSourceName ;
354
+ this .feedSourceId = feedSourceId ;
355
+ this .numberOfFeedVersions = numberOfFeedVersions ;
356
+ }
357
+
358
+ @ Override
359
+ public int compareTo (FeedVersionAudit other ) {
360
+ // Sort by numberOfFeedVersions in ascending order
361
+ return Integer .compare (other .numberOfFeedVersions , this .numberOfFeedVersions );
362
+ }
363
+ }
243
364
}
0 commit comments