Skip to content

Commit 6b80d74

Browse files
Merge pull request #627 from ibi-group/feature/DT-521-manage-feed-versions
Delete obsolete feed versions
2 parents 5fe046c + 6a27a59 commit 6b80d74

File tree

5 files changed

+233
-42
lines changed

5 files changed

+233
-42
lines changed

src/main/java/com/conveyal/datatools/manager/DataSanitizer.java

Lines changed: 143 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
package com.conveyal.datatools.manager;
22

3-
import com.conveyal.datatools.common.utils.aws.CheckedAWSException;
3+
import com.conveyal.datatools.manager.models.FeedSource;
44
import com.conveyal.datatools.manager.models.FeedVersion;
5+
import com.conveyal.datatools.manager.models.FeedVersionSummary;
6+
import com.conveyal.datatools.manager.models.Project;
57
import com.conveyal.datatools.manager.persistence.FeedStore;
68
import com.conveyal.datatools.manager.persistence.Persistence;
7-
import com.conveyal.gtfs.GTFS;
89
import com.conveyal.gtfs.util.InvalidNamespaceException;
10+
import com.mongodb.client.model.Sorts;
911
import com.conveyal.gtfs.util.Util;
1012
import com.google.common.collect.Lists;
1113
import com.mongodb.client.model.Projections;
@@ -26,20 +28,23 @@
2628
import java.sql.PreparedStatement;
2729
import java.sql.ResultSet;
2830
import java.sql.SQLException;
31+
import java.util.Collection;
2932
import java.util.ArrayList;
33+
import java.util.Collections;
3034
import java.util.HashSet;
3135
import java.util.List;
3236
import java.util.Set;
3337
import java.util.stream.Collectors;
3438

3539
import static com.conveyal.datatools.manager.DataManager.GTFS_DATA_SOURCE;
3640
import static com.conveyal.datatools.manager.DataManager.initializeApplication;
41+
import static com.mongodb.client.model.Filters.eq;
3742
import static com.mongodb.client.model.Aggregates.project;
3843
import static com.mongodb.client.model.Filters.nin;
3944

4045
/**
41-
* The Data sanitizer requires the env.yml and server.yml files for configuration. Data sanitizer specific command-line parameters
42-
* should be provided after these e.g.:
46+
* The Data sanitizer requires the env.yml and server.yml files for configuration. Data sanitizer specific command-line
47+
* parameters should be provided after these e.g.:
4348
* configurations/test/env.yml.tmp configurations/test/server.yml.tmp --orphaned delete (or -O d)
4449
*/
4550
public class DataSanitizer {
@@ -56,23 +61,44 @@ public static void main(String[] args) throws IOException {
5661
*/
5762
public static void parseArguments(String[] arguments) {
5863
Options options = new Options();
59-
Option orphanedOption = Option.builder("O")
64+
Option orphaned = Option.builder("O")
6065
.longOpt("orphaned")
6166
.desc("Optional delete command for orphaned items")
6267
.optionalArg(true)
6368
.argName("deleteCommand")
6469
.build();
65-
options.addOption(orphanedOption);
70+
Option feedVersionAudit = Option.builder("A")
71+
.longOpt("audit")
72+
.desc("Command for feed version audit")
73+
.hasArg(false)
74+
.build();
75+
Option purge = Option.builder("P")
76+
.longOpt("purge-feed-versions")
77+
.desc("Command for purging all but the latest feed version for a feed source")
78+
.hasArg(true)
79+
.argName("feedSourceId")
80+
.build();
81+
options.addOption(orphaned);
82+
options.addOption(feedVersionAudit);
83+
options.addOption(purge);
6684

6785
try {
6886
CommandLineParser parser = new DefaultParser();
6987
CommandLine cmd = parser.parse(options, arguments);
7088
if (cmd.hasOption("O")) {
7189
String deleteCommand = cmd.getOptionValue("O");
7290
boolean delete = "delete".equalsIgnoreCase(deleteCommand) || "d".equalsIgnoreCase(deleteCommand);
73-
sanitizeFeedVersions(delete);
91+
sanitizeOrphanedFeedVersions(delete);
7492
sanitizeDBSchemas(delete);
7593
}
94+
if (cmd.hasOption("A")) {
95+
feedVersionAudit();
96+
}
97+
if (cmd.hasOption("P")) {
98+
String feedSourceId = cmd.getOptionValue("P");
99+
System.out.println("Purge command received for feed source id: " + feedSourceId);
100+
deleteObsoleteFeedVersions(feedSourceId, true);
101+
}
76102
} catch (ParseException e) {
77103
System.out.println(e.getMessage());
78104
HelpFormatter formatter = new HelpFormatter();
@@ -84,7 +110,7 @@ public static void parseArguments(String[] arguments) {
84110
/**
85111
* Group orphaned feed versions and optionally delete.
86112
*/
87-
public static int sanitizeFeedVersions(boolean delete) {
113+
public static int sanitizeOrphanedFeedVersions(boolean delete) {
88114
List<FeedVersion> feedVersions = getOrphanedFeedVersions();
89115
int orphaned = feedVersions.size();
90116
if (orphaned == 0) {
@@ -116,6 +142,91 @@ private static boolean hasGTFSPlus(FeedVersion feedVersion, FeedStore gtfsPlusSt
116142
return DataManager.isModuleEnabled("gtfsplus") && gtfsPlusStore.getFeed(feedVersion.id + ".db") != null;
117143
}
118144

145+
/**
146+
* For a given feed source, delete all feed versions keeping just the latest.
147+
*/
148+
public static void deleteObsoleteFeedVersions(String feedSourceId, boolean sourceIsCli) {
149+
deleteObsoleteFeedVersions(feedSourceId, 1, sourceIsCli);
150+
}
151+
152+
/**
153+
* For a given feed source, delete feed version prior to the keep number.
154+
*/
155+
public static int deleteObsoleteFeedVersions(
156+
String feedSourceId,
157+
int numberOfVersionsToKeep,
158+
boolean sourceIsCli
159+
) {
160+
Collection<FeedVersion> feedVersions = Persistence.feedVersions.getFiltered(
161+
eq("feedSourceId", feedSourceId),
162+
Sorts.descending("version")
163+
);
164+
if (feedVersions.isEmpty() || numberOfVersionsToKeep >= feedVersions.size()) {
165+
String message = "No feed versions or none that qualify for deletion. Feed source id: " + feedSourceId;
166+
LOG.info(message);
167+
if (sourceIsCli) System.out.println(message);
168+
return -1;
169+
}
170+
171+
int keepCount = 0;
172+
int deleteCount = 0;
173+
174+
for (FeedVersion feedVersion : feedVersions) {
175+
if (keepCount < numberOfVersionsToKeep) {
176+
keepCount++;
177+
} else {
178+
feedVersion.delete();
179+
deleteCount++;
180+
}
181+
}
182+
String message = String.format("Deleted %s feed versions from feed source id: %s", deleteCount, feedSourceId);
183+
LOG.info(message);
184+
if (sourceIsCli) System.out.println(message);
185+
return deleteCount;
186+
}
187+
188+
/**
189+
* Group feed source and number of feed versions.
190+
*/
191+
public static List<FeedVersionAudit> feedVersionAudit() {
192+
System.out.println("Producing feed version audit...");
193+
List<FeedVersionAudit> audit = new ArrayList<>();
194+
195+
List<Project> projects = Persistence.projects.getAll();
196+
197+
for (Project project : projects) {
198+
Collection<FeedSource> feedSources = project.retrieveProjectFeedSources();
199+
200+
for (FeedSource feedSource : feedSources) {
201+
Collection<FeedVersionSummary> feedVersions = feedSource.retrieveFeedVersionSummaries();
202+
audit.add(new FeedVersionAudit(project.name, feedSource.name, feedSource.id, feedVersions.size()));
203+
}
204+
}
205+
206+
Collections.sort(audit);
207+
208+
if (!audit.isEmpty()) {
209+
boolean hasHeader = false;
210+
for (FeedVersionAudit feedVersionAudit : audit) {
211+
if (!hasHeader) {
212+
System.out.printf("%-40s | %-40s | %-40s | %s%n", "Project", "Feed Source", "Feed Source Id", "No. Feed Versions");
213+
hasHeader = true;
214+
}
215+
System.out.printf(
216+
"%-40s | %-40s | %-40s | %s%n",
217+
feedVersionAudit.projectName,
218+
feedVersionAudit.feedSourceName,
219+
feedVersionAudit.feedSourceId,
220+
feedVersionAudit.numberOfFeedVersions
221+
);
222+
}
223+
} else {
224+
System.out.println("No feed versions to audit!");
225+
}
226+
System.out.println("Feed version audit complete.");
227+
return audit;
228+
}
229+
119230
/**
120231
* Group orphaned schemas and optionally delete.
121232
*/
@@ -142,13 +253,9 @@ public static void sanitizeDBSchemas(boolean delete) {
142253
private static int deleteOrphanedFeedVersions(List<FeedVersion> feedVersions) {
143254
int deletedFeedVersions = 0;
144255
for (FeedVersion feedVersion : feedVersions) {
145-
try {
146-
System.out.println("Deleting orphaned feed version: " + feedVersion.id);
147-
feedVersion.deleteOrphan();
148-
deletedFeedVersions++;
149-
} catch (SQLException | CheckedAWSException | InvalidNamespaceException e) {
150-
System.err.printf("Failed to delete feed version: %s. %s%n", feedVersion.id, e.getMessage());
151-
}
256+
System.out.println("Deleting orphaned feed version: " + feedVersion.id);
257+
feedVersion.deleteOrphan();
258+
deletedFeedVersions++;
152259
}
153260
return deletedFeedVersions;
154261
}
@@ -159,12 +266,7 @@ private static int deleteOrphanedFeedVersions(List<FeedVersion> feedVersions) {
159266
public static int deleteOrphanedDBSchemas(Set<String> orphanedSchemas) {
160267
int deletedSchemas = 0;
161268
for (String orphanedSchema : orphanedSchemas) {
162-
try {
163-
GTFS.delete(orphanedSchema, DataManager.GTFS_DATA_SOURCE);
164-
LOG.info("Dropped orphaned DB schema from Postgres.");
165-
} catch (SQLException | InvalidNamespaceException e) {
166-
System.err.printf("Failed to delete DB schema: %s. %s%n", orphanedSchema, e.getMessage());
167-
}
269+
FeedVersion.deleteDBSchema(orphanedSchema);
168270
deletedSchemas++;
169271
}
170272
return deletedSchemas;
@@ -192,7 +294,6 @@ public static Set<String> getOrphanedDBSchemas(Set<String> associatedSchemas) {
192294
Set<String> orphanedSchemas = new HashSet<>();
193295
try (Connection connection = GTFS_DATA_SOURCE.getConnection()) {
194296
String sql = String.format("SELECT nspname FROM pg_namespace %s", whereClause);
195-
LOG.info(sql);
196297
PreparedStatement preparedStatement = connection.prepareStatement(sql);
197298
ResultSet resultSet = preparedStatement.executeQuery();
198299
while (resultSet.next()) {
@@ -240,4 +341,24 @@ public static Set<String> getFieldFromDocument(String field, String document) {
240341
}
241342
return fields;
242343
}
344+
345+
public static class FeedVersionAudit implements Comparable<FeedVersionAudit> {
346+
public final String projectName;
347+
public final String feedSourceName;
348+
public final String feedSourceId;
349+
public final int numberOfFeedVersions;
350+
351+
FeedVersionAudit(String projectName, String feedSourceName, String feedSourceId, int numberOfFeedVersions) {
352+
this.projectName = projectName;
353+
this.feedSourceName = feedSourceName;
354+
this.feedSourceId = feedSourceId;
355+
this.numberOfFeedVersions = numberOfFeedVersions;
356+
}
357+
358+
@Override
359+
public int compareTo(FeedVersionAudit other) {
360+
// Sort by numberOfFeedVersions in ascending order
361+
return Integer.compare(other.numberOfFeedVersions, this.numberOfFeedVersions);
362+
}
363+
}
243364
}

src/main/java/com/conveyal/datatools/manager/models/FeedVersion.java

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -609,10 +609,8 @@ public void delete() {
609609
}
610610
ensurePublishedVersionIdIsUnset(fs);
611611

612-
feedStore.deleteFeed(id);
613-
// Delete feed version tables in GTFS database
614-
GTFS.delete(this.namespace, DataManager.GTFS_DATA_SOURCE);
615-
LOG.info("Dropped version's GTFS tables from Postgres.");
612+
deleteFeedVersionFile();
613+
deleteDBSchema(namespace);
616614
// Remove this FeedVersion from all Deployments associated with this FeedVersion's FeedSource's Project
617615
// TODO TEST THOROUGHLY THAT THIS UPDATE EXPRESSION IS CORRECT
618616
// Although outright deleting the feedVersion from deployments could be surprising and shouldn't be done anyway.
@@ -633,18 +631,49 @@ public void delete() {
633631
/**
634632
* Delete resources related to this orphaned feed version. Then delete the orphaned feed version.
635633
*/
636-
public void deleteOrphan() throws SQLException, InvalidNamespaceException, CheckedAWSException {
637-
feedStore.deleteFeed(id);
638-
GTFS.delete(namespace, DataManager.GTFS_DATA_SOURCE);
639-
LOG.info("Dropped feed version's GTFS tables from Postgres.");
634+
public void deleteOrphan() {
635+
deleteFeedVersionFile();
636+
deleteDBSchema(namespace);
640637
if (DataManager.isModuleEnabled("gtfsplus")) {
641-
FeedStore gtfsPlusStore = new FeedStore(DataManager.GTFS_PLUS_SUBDIR);
642-
gtfsPlusStore.deleteFeed(id + ".db");
643-
gtfsPlusStore.deleteFeed(id + ".db.p");
638+
try {
639+
FeedStore gtfsPlusStore = new FeedStore(DataManager.GTFS_PLUS_SUBDIR);
640+
gtfsPlusStore.deleteFeed(id + ".db");
641+
gtfsPlusStore.deleteFeed(id + ".db.p");
642+
} catch (CheckedAWSException e) {
643+
LOG.error("Failed to delete GTFS+ files.");
644+
}
644645
}
645646
Persistence.feedVersions.removeById(id);
646647
}
647648

649+
/**
650+
* Delete the feed version file related to this feed version.
651+
*/
652+
private void deleteFeedVersionFile() {
653+
try {
654+
feedStore.deleteFeed(id);
655+
} catch (CheckedAWSException e) {
656+
LOG.error("Failed to delete feed version file.");
657+
}
658+
}
659+
660+
/**
661+
* Delete the database schema related to this feed version. Catch all related errors to prevent follow on tasks from
662+
* being skipped.
663+
*/
664+
public static void deleteDBSchema(String schema) {
665+
if (schema == null) {
666+
LOG.warn("Schema is null! Unable to delete feed version's GTFS schema from Postgres.");
667+
return;
668+
}
669+
try {
670+
GTFS.delete(schema, DataManager.GTFS_DATA_SOURCE);
671+
LOG.info("Deleted feed version's GTFS schema from Postgres.");
672+
} catch (SQLException | InvalidNamespaceException e) {
673+
LOG.error("Failed to delete feed version's GTFS schema from Postgres.");
674+
}
675+
}
676+
648677
/**
649678
* If this feed version is referenced in the parent feed source by publishedVersionId,
650679
* ensure that the field is set to null.

0 commit comments

Comments
 (0)