forked from Molmed/sisyphus
-
Notifications
You must be signed in to change notification settings - Fork 0
/
cleanSweStore.pl
200 lines (161 loc) · 5.75 KB
/
cleanSweStore.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!/usr/bin/perl -w
##
use Getopt::Long;
use Pod::Usage;
use strict;
use warnings;
=pod
=head1 NAME
cleanSweStore.pl - clean swestore from old projects
=head1 SYNOPSIS
cleanSweStore.pl -help|-man
cleanSweStore.pl -projectFile <file with a list of projects> [-execute]
=head1 OPTIONS
=over 4
=item -h|-help
prints out a brief help text.
=item -m|-man
Opens the manpage.
=item -projectFile
Path to a file containing all projects that should be removed from SweStore.
The file should contain two columns per row where the first column is the
runfoldername and the second column is the projecID. One row per project must
be created if a runfolder contain multiple projects that should be removed.
=item -execute
Use flag when you want to perform the deletion. If not set, the script
will only validate that the provided projects can be found and also
specify if the entire runfolder will be deleted or just a subset
of the projects.
=item -unaligned
Try to remove Unaligned folder for the specified flowcells.
=back
=cut
my ($inputProjectFile, $debug, $execute, $removeUnaligned);
my $swestorePath = "/ssUppnexZone/proj/a2009002";
my ($help,$man) = (0,0);
# Project input file should have the following format
# runfoldername1 projectId1
# runfoldername1 projectId2
# runfoldername2 projectId3
# ...
# runfoldernameN\tprojectIdN
#
GetOptions('help|?'=>\$help,
'man'=>\$man,
'projectFile=s' => \$inputProjectFile,
'unaligned!' => \$removeUnaligned,
'execute!' => \$execute,
'debug' => \$debug,
) or pod2usage(-verbose => 0);
pod2usage(-verbose => 1) if ($help);
pod2usage(-verbose => 2) if ($man);
unless (defined($inputProjectFile)) {
print "You must provide a list of projects to clean, format: runfoldername\tprojectid\n";
pod2usage(-verbose => 1);
exit;
}
#
# Save all runfolder and the associated projects into a hash structure
#
# var => {
# runfolder_name1 => {
# projId1 => 1,
# projId2 => 1,
# }
# runfolder_name2 => {
# projId3 => 1,
# projId4 => 1,
# }#
# }
#
open PROJECTS, $inputProjectFile or die "Couldn't open project file: $inputProjectFile!";
my $dataToClean;
while(<PROJECTS>){
if(!/^#/) {
chomp;
my ($runfolder,$project) = split(/\t/, $_);
$project =~ s/\s+//;
$dataToClean->{$runfolder}->{$project} = 1;
}
}
close(PROJECTS);
my $timestamp = time;
my ($REMOVED, $NOTREMOVED,$LEFTONSWESTORE,$NOTFOUND);
#do not want to load, no idea why
#qx(module load irods);
print "Cleaning swestore!\n";
if($execute) {
open $REMOVED, "> removedFromSweStore.$timestamp.log" or die "Couldn't open output file: removedFromSweStore.$timestamp.log!\n";
open $NOTREMOVED, "> notRemovedFromSweStore.$timestamp.log" or die "Couldn't open output file: notRemovedFromSweStore.$timestamp.log!\n";
open $LEFTONSWESTORE, "> leftOnSweStore.$timestamp.log" or die "Couldn't open output file: leftOnSweStore.$timestamp.log!\n";
} else {
open $REMOVED, "> dryRun.removedFromSweStore.$timestamp.log" or die "Couldn't open output file: dryRun.removedFromSweStore.$timestamp.log!\n";
open $NOTREMOVED, "> dryRun.notRemovedFromSweStore.$timestamp.log" or die "Couldn't open output file: dryRun.notRemovedFromSweStore.$timestamp.log!\n";
open $LEFTONSWESTORE, "> dryRun.leftOnSweStore.$timestamp.log" or die "Couldn't open output file: dryRun.leftOnSweStore.$timestamp.log!\n";
}
my $counterRemoved = 0;
my $counterNotRemoved = 0;
#
# Remove the provided projects from SweStore
#
foreach my $runfolder (keys %{$dataToClean}) { # Process each runfolder
my ($year,$month,$day) = ($runfolder =~ m/^(\d{2})(\d{2})(\d{2})_[A-Z0-9-]+_[0-9]+_[A-Z0-9-]+/);
# Find each stored project at SweStore, for the specified runfolder
my $projects = qx(ils $swestorePath/20$year-$month/$runfolder/Projects/);
#Result from ils
#
# ils /ssUppnexZone/proj/a2009002/2014-06/140605_D00118_0144_AC44G7ACXX/Projects/
# /ssUppnexZone/proj/a2009002/2014-06/140605_D00118_0144_AC44G7ACXX/Projects:
# C- /ssUppnexZone/proj/a2009002/2014-06/140605_D00118_0144_AC44G7ACXX/Projects/MK-0401
# C- /ssUppnexZone/proj/a2009002/2014-06/140605_D00118_0144_AC44G7ACXX/Projects/MK-0429
#
my @projectPath = split(/\n/,$projects);
my %foundProjects;
# Only extract information from lines containing "C-"
foreach (@projectPath) {
if(/^\s*C-/) {
my ($project) = ($_ =~ m/.*\/([A-Z]{2}-?[0-9]{2,4})$/);
if($project) {
$project =~ s/\s+//;
$foundProjects{$project} = 1;
}
}
}
#Remove projects from SweStore
foreach my $key (keys %{$dataToClean->{$runfolder}}) {
if(exists($foundProjects{$key})) {
print("Removing project $key from $runfolder\n");
if($execute) { #Perform deletion
qx(irm -rf $swestorePath/20$year-$month/$runfolder/Projects/$key/$runfolder);
}
print $REMOVED "$swestorePath/20$year-$month/$runfolder/Projects/$key/$runfolder\n";
delete $foundProjects{$key};
$counterRemoved++;
} else {
print("Couldn't find project $key for $runfolder\n");
$counterNotRemoved++;
print $NOTREMOVED "$runfolder\t$key\n";
}
}
foreach my $key (keys %foundProjects) {
print $LEFTONSWESTORE "$runfolder\t$key\n";
}
if($removeUnaligned) {
#Check if Unaligned folder exists
my $unalignedPath = "$swestorePath/20$year-$month/$runfolder/Unaligned";
my $regex = "C- $unalignedPath";
my $unalignedFound = qx(ils $unalignedPath) =~ /\Q$regex/;
if($unalignedFound) { #If found remove it
print "Removing unaligned for runfolder $runfolder\n";
if($execute) { #Perform deletion
qx(irm -rf $unalignedPath);
}
} else { #Warn if the Unaligned folder could be found
print "Couldn't find unaligned for $runfolder\n";
}
}
}
close($NOTREMOVED);
close($REMOVED);
close($LEFTONSWESTORE);
print "Cleaning completed:\n\t$counterRemoved projects removed\n\t$counterNotRemoved couldn't be removed\n";