From 04233e41720175153a0eca6244920edefc47e604 Mon Sep 17 00:00:00 2001 From: mmzhang7 <167454772+mmzhang7@users.noreply.github.com> Date: Thu, 6 Jun 2024 23:44:48 +0000 Subject: [PATCH] Initial commit --- scibot/extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scibot/extract.py b/scibot/extract.py index 4dea4ff..927b734 100644 --- a/scibot/extract.py +++ b/scibot/extract.py @@ -293,7 +293,7 @@ def find_rrids(text): # second round orblock = '(' + '|'.join(col0(prefixes)) + ')' sep = '(:|_)([ \t]*)' - agsep = '([ \t]*#)([ \t]*)' + agsep = '([ \t]*[#:])([ \t]*)' #agsep = '([ \t]*#)([ \t]*)' agorblock = '(' + '|'.join(col0(agprefixes)) + ')' regex2 = ('(.{0,32})(?:' + orblock + f'{sep}(\d+)|(CVCL){sep}(\w+)|' + agorblock + f'{agsep}(\w+))([^\w].{{0,31}})') # the first 0,32 always greedy matches???