Skip to content

Commit

Permalink
Speed up datacard parsing
Browse files Browse the repository at this point in the history
Two regular expressions were applied to all lines in card, both of which
were slower than doing the equivalent transformation by hand in python.

 - `\\s*#.*` removes comments and whitespace at the end of the line
 - `(?<=\\s)-+(\\s|$)` converts any ` -` entries to ` 0`

The latter was applied universally, whereas now it is only checked for
parameter effect size arguments. This means that the interpretation of
arguments for nuisances such as `gmN` is not more restricted.
  • Loading branch information
nsmith- committed Aug 23, 2022
1 parent da84905 commit 840d8e0
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions python/DatacardParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,14 @@ def addDatacardParserOptions(parser):
)


def strip(l):
"""Strip comments and whitespace from end of line"""
idx = l.find("#")
if idx > 0:
return l[:idx].rstrip()
return l.rstrip()


def isVetoed(name, vetoList):
for pattern in vetoList:
if not pattern:
Expand Down Expand Up @@ -453,10 +461,10 @@ def parseCard(file, options):
break # rate is the last line before nuisances
# parse nuisances
for lineNumber2, l in enumerate(file):
if l.startswith("--"):
if l.startswith("--") or l.startswith("#"):
continue
l = re.sub("\\s*#.*", "", l)
l = re.sub("(?<=\\s)-+(\\s|$)", " 0\\1", l)

l = strip(l)
f = l.split()
if len(f) <= 1:
continue
Expand Down Expand Up @@ -623,6 +631,8 @@ def parseCard(file, options):
if v <= 0.00:
raise ValueError('Found "%s" in the nuisances affecting %s for %s. This would lead to NANs later on, so please fix it.' % (r, p, b))
else:
if r == "-" * len(r):
r = 0.0
errline[b][p] = float(r)
# values of 0.0 are treated as 1.0; scrap negative values.
if pdf not in ["trG", "dFD", "dFD2"] and errline[b][p] < 0:
Expand Down

0 comments on commit 840d8e0

Please sign in to comment.