You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
in order to make it work, i had to make edits - synchronous indexing is too slow, better to send the job to the celery queue, but then will have to rethink mechanism for detecting the job completion
also, the index size was 146GB; the script would fail (it expects 160)
index d9c158e..e908efd 100644
--- a/adsmp/solr_updater.py+++ b/adsmp/solr_updater.py@@ -285,7 +285,7 @@ def transform_json_record(db_record):
else:
if target is None:
continue
- print 'field = {}'.format(field)+ #print 'field = {}'.format(field)
out.update(db_record.get(field))
elif field.startswith('#'):
if callable(target):
diff --git a/run.py b/run.py
index e82b2ed..05cc48d 100755
--- a/run.py+++ b/run.py@@ -202,13 +202,13 @@ def rebuild_collection(collection_name):
batch.append(rec.bibcode)
if len(batch) > 1000:
- tasks.task_index_records(batch, force=True, update_solr=True,+ tasks.task_index_records.delay(batch, force=True, update_solr=True,
update_metrics=False, update_links=False,
ignore_checksums=True, solr_targets=solr_urls)
batch = []
if len(batch) > 0:
- tasks.task_index_records(batch, force=True, update_solr=True,+ tasks.task_index_records.delay(batch, force=True, update_solr=True,
update_metrics=False, update_links=False,
ignore_checksums=True, solr_targets=solr_urls)
diff --git a/scripts/compare_solrs.py b/scripts/compare_solrs.py
index 953a4e0..b0cda0d 100644
--- a/scripts/compare_solrs.py+++ b/scripts/compare_solrs.py@@ -240,7 +240,7 @@ def main():
while True:
line = sys.stdin.readline()
if len(line) == 0:
- break+ return
bibcode = line.strip()
mismatch = query_and_compare(bibcode,
args.solr_endpoints[0], args.solr_endpoints[1])
diff --git a/scripts/reindex.py b/scripts/reindex.py
index 52c32bd..4201b58 100644
--- a/scripts/reindex.py+++ b/scripts/reindex.py@@ -6,13 +6,16 @@ import sys
import pickle
import requests
import time
++homedir = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))+if homedir not in sys.path:+ sys.path.append(homedir)+print homedir+
from subprocess import PIPE, Popen
from adsmp import tasks
from adsputils import setup_logging
-homedir = os.path.dirname(os.path.dirname(__file__))-if homedir not in sys.path:- sys.path.append(homedir)
app = tasks.app
logger = setup_logging('rebuild')
@@ -69,7 +72,7 @@ def run():
data['start'] = now
write_lockfile(lockfile, data)
- command = 'python run.py --rebuild-collection collection2 >> %s/logs/reindex.log' % (homedir)+ command = 'python run.py --rebuild-collection --solr-collection collection2 >> %s/logs/reindex.log' % (homedir)
retcode, stdout, stderr = execute(command, cwd=homedir)
if retcode != 0:
@@ -138,4 +141,4 @@ def verify_collection2_size(data):
if __name__ == '__main__':
- run()
\ No newline at end of file
+ run()
The text was updated successfully, but these errors were encountered:
in order to make it work, i had to make edits - synchronous indexing is too slow, better to send the job to the celery queue, but then will have to rethink mechanism for detecting the job completion
also, the index size was 146GB; the script would fail (it expects 160)
The text was updated successfully, but these errors were encountered: