-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautoPan.pl
181 lines (146 loc) · 4.52 KB
/
autoPan.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# Copyright (C) 2005 Edgar Gonzàlez i Pellicer
# Maria Fuentes Fort
#
# This file is part of AutoPan
#
# AutoPan is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
use strict;
use FindBin qw($RealBin);
use lib "$RealBin";
use Getopt::Long;
use EngTok;
use Porter;
use Pyramid;
use Summary;
use WeightedPyramid;
use Writer;
# Help string
my $helpString = << "EOF;";
Usage:
$0 [options] <pyramidFile> <summaryFile>
Options:
--lower
--nolower
Lower-case or not words
(Default is yes)
--stop
--nostop
Remove or not stop words
(Default is yes)
--stem
--nostem
Stem words. Implies --lower and --stop.
(Default is yes)
--stop-word-file <file>
Stop word file.
(Default is stopwords/empty.en)
--min-overlap <fraction>
Minimum required overlap.
(Default is 0.9)
--use-contributors
--nouse-contributors
Use or not contributors for overlap.
(Default is yes)
--min-contributor-length <length>
Minimum contributor length
(Default is 2)
--length-weighting
--nolength-weighting
Use or not length weighting.
(Default is yes)
--greedy-alignment
--nogreedy-alignment
Use or not a greedy alignment strategy.
(Default is not)
--output-format=pln
--output-format=pan
Output format is plain (pln) or XML (pan)
(Default is pln)
--help
Show this help
EOF;
# Options
my $lower = 1;
my $stop = 1;
my $stem = 1;
my $stopWordFile = 'stopwords/empty.en';
my $minOverlap = 0.9;
my $useContributors = 1;
my $minContriLength = 2;
my $lengthWeighting = 1;
my $greedyAlignment;
my $outputFormat = 'pln';
my $help;
# Get the options
if (!GetOptions('lower!' => \$lower,
'stop!' => \$stop,
'stem!' => \$stem,
'stop-word-file=s' => \$stopWordFile,
'min-overlap=f' => \$minOverlap,
'use-contributors!' => \$useContributors,
'min-contributor-length=i' => \$minContriLength,
'length-weighting!' => \$lengthWeighting,
'greedy-alignment!' => \$greedyAlignment,
'output-format=s' => \$outputFormat,
'help' => \$help) ||
$help || @ARGV != 2) {
die $helpString;
}
# Get the parameters
my ($pyramidFile, $summaryFile) = @ARGV;
# Check the format
$outputFormat = lc($outputFormat);
die $helpString if $outputFormat !~ /^(pln|pan)$/;
# Create the stemmer and tokenizer
my $stemmer = new Porter($stopWordFile);
my $tokenizer = new EngTok();
# Create a writer
my $writer = $outputFormat eq 'pan' ? new XMLWriter() : new PlainWriter();
# Create the options object
my %options = ( 'lower' => $lower,
'stop' => $stop,
'stem' => $stem,
'contrib' => $useContributors,
'mincontri' => $minContriLength,
'stemmer' => $stemmer,
'tokenizer' => $tokenizer,
'writer' => $writer );
# Write the header
$writer->printHeader();
# Load the pyramid
my $pyramid = $lengthWeighting ?
new WeightedPyramid($pyramidFile, %options) :
new Pyramid($pyramidFile, %options);
# Write the middle sequence
$writer->printMiddle();
# Load the summary
my $summary = new Summary($summaryFile, %options);
# Print the summary
$writer->printSummary($summary);
# Align
my $alignment;
if ($greedyAlignment) {
$alignment =
$pyramid->greedyAlignSummary($summary, $minOverlap, $useContributors);
} else {
$alignment =
$pyramid->alignSummary($summary, $minOverlap, $useContributors);
}
# Print the alignment
$writer->printAlignment($alignment, $summary, $pyramid);
# Print the footer
$writer->printFooter();
# That's all
exit(0);