diff --git a/cron/mirror.php b/cron/mirror.php deleted file mode 100644 index 133914d..0000000 --- a/cron/mirror.php +++ /dev/null @@ -1,127 +0,0 @@ -= now()) AND url IS NOT NULL - ORDER BY RAND() ASC'; -$result = mysql_query($sql); -while ($tmp = mysql_fetch_array($result)) -{ - $legislators[$tmp{shortname}] = $tmp['url']; -} - -# Compare the array of directory names to the array of known legislators, generating a list of all -# legislators of whom we have no record. -$missing = array_diff_key($legislators, $directories); - -# Then compare in the opposite direction, generating a list of all directories that are for -# legislators not found in the our list -- that is, retired legislators. -$retired = array_keys(array_diff_key($directories, $legislators)); - -# Iterate through the list of retired legislators and remove each of their directories. -foreach ($retired as $remove) -{ - unset($directories[$remove]); -} - - -# If we do have missing legislators (that is, a legislator about whom we know, and who has a -# website, but we don't have a copy of their website), then we want to grab a copy of one of their -# sites. We don't want to do this every time, because it will block us from updating existing sites -# if we can't retrieve this site, so we just do it 5% of the time. -if ( (count($missing) > 0) && (rand(1,20) == 1) ) -{ - $legislator = key($missing); -} - -# Since there is no legislator in the database whose site we don't have a copy of, we can just use -# a random directory name at the top of the stack (that is, the oldest ones) as our legislator to be -# updated, though we iterate through to make sure it's a legislator that's still in office. Note -# that we just don't take the one on the top of the stack because if that site couldn't be retrieved -# for some reason, we'd be blocking all future updates of all sites. -else -{ - - # Slice off the top five directories from the stack. - $directories = array_slice($directories, 0, 10); - - # Shuffle these five directories. - $keys = array_keys($directories); - shuffle($keys); - $directories = array_merge(array_flip($keys), $directories); - - # Step through the randomly ordered five directories. - foreach ($directories as $directory => $blah) - { - if (isset($legislators[$directory])) - { - $legislator = $directory; - break; - } - } -} - -# Retrieve this legislator's URL from the database. -$url = $legislators[$legislator]; - -# Execute the actual mirroring. Allowing up to 300 seconds for this to run. -chdir($dir); -$cmd = '/vol/www/richmondsunlight.com/alarmlimit 300 wget -a log.txt --mirror --html-extension --directory-prefix='.$legislator.'/'.date('Ymd').'/ --no-host-directories --convert-links '.$url; -echo $cmd; -exec($cmd, $output); -echo 'Link';