diff --git a/Dockerfile.delft b/Dockerfile.delft
index cf5942e0eb..d33e4ffa80 100644
--- a/Dockerfile.delft
+++ b/Dockerfile.delft
@@ -142,7 +142,7 @@ RUN python3 preload_embeddings.py --registry ./resources-registry.json &&  \
 RUN mkdir delft && \
     cp ./resources-registry.json delft/
 
-ENV GROBID_SERVICE_OPTS "--add-opens java.base/java.lang=ALL-UNNAMED"
+ENV GROBID_SERVICE_OPTS "--add-opens java.base/java.lang=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED"
 
 CMD ["./grobid-service/bin/grobid-service"]
 
diff --git a/grobid-core/src/main/java/org/grobid/core/lexicon/Lexicon.java b/grobid-core/src/main/java/org/grobid/core/lexicon/Lexicon.java
index b24ad47d07..1b017c8a1f 100755
--- a/grobid-core/src/main/java/org/grobid/core/lexicon/Lexicon.java
+++ b/grobid-core/src/main/java/org/grobid/core/lexicon/Lexicon.java
@@ -16,6 +16,7 @@
 import java.util.Set;
 import java.util.StringTokenizer;
 import java.util.regex.*;
+import java.util.stream.Collectors;
 
 import javax.xml.parsers.SAXParser;
 import javax.xml.parsers.SAXParserFactory;
@@ -36,6 +37,7 @@
 import org.grobid.core.utilities.Utilities;
 import org.grobid.core.utilities.TextUtilities;
 import org.grobid.core.analyzers.GrobidAnalyzer;
+import org.jetbrains.annotations.Nullable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -43,10 +45,9 @@
 
 /**
  * Class for managing all the lexical resources.
- *
  */
 public class Lexicon {
-	private static final Logger LOGGER = LoggerFactory.getLogger(Lexicon.class);
+    private static final Logger LOGGER = LoggerFactory.getLogger(Lexicon.class);
     // private static volatile Boolean instanceController = false;
     private static volatile Lexicon instance;
 
@@ -59,7 +60,7 @@ public class Lexicon {
     private Set<String> countries = null;
 
     // retrieve basic naming information about a research infrastructure (key must be lower case!)
-    private Map<String, List<OrganizationRecord> > researchOrganizations = null;
+    private Map<String, List<OrganizationRecord>> researchOrganizations = null;
 
     // fast matchers for efficient and flexible pattern matching in layout token sequence or strings
     private FastMatcher abbrevJournalPattern = null;
@@ -67,21 +68,21 @@ public class Lexicon {
     private FastMatcher publisherPattern = null;
     private FastMatcher journalPattern = null;
     private FastMatcher cityPattern = null;
-	private FastMatcher organisationPattern = null;
+    private FastMatcher organisationPattern = null;
     private FastMatcher researchInfrastructurePattern = null;
-	private FastMatcher locationPattern = null;
+    private FastMatcher locationPattern = null;
     private FastMatcher countryPattern = null;
-	private FastMatcher orgFormPattern = null;
+    private FastMatcher orgFormPattern = null;
     private FastMatcher collaborationPattern = null;
     private FastMatcher funderPattern = null;
     private FastMatcher personTitlePattern = null;
-	private FastMatcher personSuffixPattern = null;
+    private FastMatcher personSuffixPattern = null;
 
     public static Lexicon getInstance() {
         if (instance == null) {
             synchronized (Lexicon.class) {
                 if (instance == null) {
-					getNewInstance();
+                    getNewInstance();
                 }
             }
         }
@@ -91,11 +92,11 @@ public static Lexicon getInstance() {
     /**
      * Creates a new instance.
      */
-	private static synchronized void getNewInstance() {
-		LOGGER.debug("Get new instance of Lexicon");
-		GrobidProperties.getInstance();
-		instance = new Lexicon();
-	}
+    private static synchronized void getNewInstance() {
+        LOGGER.debug("Get new instance of Lexicon");
+        GrobidProperties.getInstance();
+        instance = new Lexicon();
+    }
 
     /**
      * Hidden constructor
@@ -103,24 +104,24 @@ private static synchronized void getNewInstance() {
     private Lexicon() {
         initDictionary();
         initNames();
-		// the loading of the journal and conference names is lazy
+        // the loading of the journal and conference names is lazy
         addDictionary(GrobidProperties.getGrobidHomePath() + File.separator +
-			"lexicon"+File.separator+"wordforms"+File.separator+"english.wf", Language.EN);
+            "lexicon" + File.separator + "wordforms" + File.separator + "english.wf", Language.EN);
         addDictionary(GrobidProperties.getGrobidHomePath() + File.separator +
-			"lexicon"+File.separator+"wordforms"+File.separator+"german.wf", Language.EN);
+            "lexicon" + File.separator + "wordforms" + File.separator + "german.wf", Language.EN);
+        addLastNames(GrobidProperties.getGrobidHomePath() + File.separator +
+            "lexicon" + File.separator + "names" + File.separator + "names.family");
         addLastNames(GrobidProperties.getGrobidHomePath() + File.separator +
-			"lexicon"+File.separator+"names"+File.separator+"names.family");
-		addLastNames(GrobidProperties.getGrobidHomePath() + File.separator +
-			"lexicon"+File.separator+"names"+File.separator+"lastname.5k");
+            "lexicon" + File.separator + "names" + File.separator + "lastname.5k");
+        addFirstNames(GrobidProperties.getGrobidHomePath() + File.separator +
+            "lexicon" + File.separator + "names" + File.separator + "names.female");
         addFirstNames(GrobidProperties.getGrobidHomePath() + File.separator +
-			"lexicon"+File.separator+"names"+File.separator+"names.female");
+            "lexicon" + File.separator + "names" + File.separator + "names.male");
         addFirstNames(GrobidProperties.getGrobidHomePath() + File.separator +
-			"lexicon"+File.separator+"names"+File.separator+"names.male");
-		addFirstNames(GrobidProperties.getGrobidHomePath() + File.separator +
-			"lexicon"+File.separator+"names"+File.separator+"firstname.5k");
+            "lexicon" + File.separator + "names" + File.separator + "firstname.5k");
         initCountryCodes();
         addCountryCodes(GrobidProperties.getGrobidHomePath() + File.separator +
-            "lexicon"+File.separator+"countries"+File.separator+"CountryCodes.xml");
+            "lexicon" + File.separator + "countries" + File.separator + "CountryCodes.xml");
     }
 
     /**
@@ -139,7 +140,7 @@ public OrganizationRecord(String name, String fullName, String lang) {
     }
 
     private void initDictionary() {
-    	LOGGER.info("Initiating dictionary");
+        LOGGER.info("Initiating dictionary");
         dictionary_en = new HashSet<>();
         dictionary_de = new HashSet<>();
         LOGGER.info("End of Initialization of dictionary");
@@ -149,11 +150,11 @@ public final void addDictionary(String path, String lang) {
         File file = new File(path);
         if (!file.exists()) {
             throw new GrobidResourceException("Cannot add entries to dictionary (language '" + lang +
-                    "'), because file '" + file.getAbsolutePath() + "' does not exists.");
+                "'), because file '" + file.getAbsolutePath() + "' does not exists.");
         }
         if (!file.canRead()) {
             throw new GrobidResourceException("Cannot add entries to dictionary (language '" + lang +
-                    "'), because cannot read file '" + file.getAbsolutePath() + "'.");
+                "'), because cannot read file '" + file.getAbsolutePath() + "'.");
         }
         InputStream ist = null;
         InputStreamReader isr = null;
@@ -202,14 +203,14 @@ public boolean isCountry(String tok) {
     }
 
     private void initNames() {
-    	LOGGER.info("Initiating names");
+        LOGGER.info("Initiating names");
         firstNames = new HashSet<String>();
         lastNames = new HashSet<String>();
         LOGGER.info("End of initialization of names");
     }
 
     private void initCountryCodes() {
-    	LOGGER.info("Initiating country codes");
+        LOGGER.info("Initiating country codes");
         countryCodes = new HashMap<String, String>();
         countries = new HashSet<String>();
         countryPattern = new FastMatcher();
@@ -220,11 +221,11 @@ private void addCountryCodes(String path) {
         File file = new File(path);
         if (!file.exists()) {
             throw new GrobidResourceException("Cannot add country codes to dictionary, because file '" +
-                    file.getAbsolutePath() + "' does not exists.");
+                file.getAbsolutePath() + "' does not exists.");
         }
         if (!file.canRead()) {
             throw new GrobidResourceException("Cannot add country codes to dictionary, because cannot read file '" +
-                    file.getAbsolutePath() + "'.");
+                file.getAbsolutePath() + "'.");
         }
         InputStream ist = null;
         //InputStreamReader isr = null;
@@ -262,7 +263,7 @@ public void initCountryPatterns() {
         if (countries == null || countries.size() == 0) {
             // it should never be the case
             addCountryCodes(GrobidProperties.getGrobidHomePath() + File.separator +
-                "lexicon"+File.separator+"countries"+File.separator+"CountryCodes.xml");
+                "lexicon" + File.separator + "countries" + File.separator + "CountryCodes.xml");
         }
 
         for (String country : countries) {
@@ -274,11 +275,11 @@ public final void addFirstNames(String path) {
         File file = new File(path);
         if (!file.exists()) {
             throw new GrobidResourceException("Cannot add first names to dictionary, because file '" +
-                    file.getAbsolutePath() + "' does not exists.");
+                file.getAbsolutePath() + "' does not exists.");
         }
         if (!file.canRead()) {
             throw new GrobidResourceException("Cannot add first names to dictionary, because cannot read file '" +
-                    file.getAbsolutePath() + "'.");
+                file.getAbsolutePath() + "'.");
         }
         InputStream ist = null;
         BufferedReader dis = null;
@@ -318,11 +319,11 @@ public final void addLastNames(String path) {
         File file = new File(path);
         if (!file.exists()) {
             throw new GrobidResourceException("Cannot add last names to dictionary, because file '" +
-                    file.getAbsolutePath() + "' does not exists.");
+                file.getAbsolutePath() + "' does not exists.");
         }
         if (!file.canRead()) {
             throw new GrobidResourceException("Cannot add last names to dictionary, because cannot read file '" +
-                    file.getAbsolutePath() + "'.");
+                file.getAbsolutePath() + "'.");
         }
         InputStream ist = null;
         BufferedReader dis = null;
@@ -360,6 +361,7 @@ public final void addLastNames(String path) {
 
     /**
      * Lexical look-up, default is English
+     *
      * @param s a string to test
      * @return true if in the dictionary
      */
@@ -415,13 +417,13 @@ public boolean inDictionary(String s, String lang) {
     public void initJournals() {
         try {
             abbrevJournalPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/journals/abbrev_journals.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/journals/abbrev_journals.txt"));
 
             journalPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/journals/journals.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/journals/journals.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException(
-                    "Error when compiling lexicon matcher for abbreviated journal names.", e);
+                "Error when compiling lexicon matcher for abbreviated journal names.", e);
         }
     }
 
@@ -429,7 +431,7 @@ public void initConferences() {
         // ArrayList<String> conferences = new ArrayList<String>();
         try {
             conferencePattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/journals/proceedings.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/journals/proceedings.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for conference names.", e);
         }
@@ -438,7 +440,7 @@ public void initConferences() {
     public void initPublishers() {
         try {
             publisherPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/publishers/publishers.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/publishers/publishers.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for conference names.", e);
         }
@@ -447,7 +449,7 @@ public void initPublishers() {
     public void initCities() {
         try {
             cityPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/places/cities15000.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/places/cities15000.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for cities.", e);
         }
@@ -458,56 +460,56 @@ public void initCollaborations() {
             //collaborationPattern = new FastMatcher(new
             //        File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/collaborations.txt"));
             collaborationPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/inspire_collaborations.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/inspire_collaborations.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for collaborations.", e);
         }
     }
 
-	public void initOrganisations() {
+    public void initOrganisations() {
         try {
             organisationPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/WikiOrganizations.lst"));
-			organisationPattern.loadTerms(new File(GrobidProperties.getGrobidHomePath() +
-				"/lexicon/organisations/government.government_agency"));
-			organisationPattern.loadTerms(new File(GrobidProperties.getGrobidHomePath() +
-				"/lexicon/organisations/known_corporations.lst"));
-			organisationPattern.loadTerms(new File(GrobidProperties.getGrobidHomePath() +
-				"/lexicon/organisations/venture_capital.venture_funded_company"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/WikiOrganizations.lst"));
+            organisationPattern.loadTerms(new File(GrobidProperties.getGrobidHomePath() +
+                "/lexicon/organisations/government.government_agency"));
+            organisationPattern.loadTerms(new File(GrobidProperties.getGrobidHomePath() +
+                "/lexicon/organisations/known_corporations.lst"));
+            organisationPattern.loadTerms(new File(GrobidProperties.getGrobidHomePath() +
+                "/lexicon/organisations/venture_capital.venture_funded_company"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for organisations.", e);
         } catch (IOException e) {
             throw new GrobidResourceException("Cannot add term to matcher, because the lexicon resource file " +
-				"does not exist or cannot be read.", e);
+                "does not exist or cannot be read.", e);
         } catch (Exception e) {
-			throw new GrobidException("An exception occured while running Grobid Lexicon init.", e);
-		}
+            throw new GrobidException("An exception occured while running Grobid Lexicon init.", e);
+        }
     }
 
-	public void initOrgForms() {
+    public void initOrgForms() {
         try {
-			orgFormPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/orgClosings.txt"));
+            orgFormPattern = new FastMatcher(new
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/orgClosings.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for organisations.", e);
         } catch (Exception e) {
-			throw new GrobidException("An exception occured while running Grobid Lexicon init.", e);
-		}
+            throw new GrobidException("An exception occured while running Grobid Lexicon init.", e);
+        }
     }
 
-	public void initLocations() {
+    public void initLocations() {
         try {
             locationPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/places/location.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/places/location.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for locations.", e);
         }
     }
 
-	public void initPersonTitles() {
+    public void initPersonTitles() {
         try {
             personTitlePattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/names/VincentNgPeopleTitles.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/names/VincentNgPeopleTitles.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for person titles.", e);
         }
@@ -516,7 +518,7 @@ public void initPersonTitles() {
     public void initPersonSuffix() {
         try {
             personSuffixPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/names/suffix.txt"));
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/names/suffix.txt"));
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for person name suffix.", e);
         }
@@ -525,8 +527,8 @@ public void initPersonSuffix() {
     public void initFunders() {
         try {
             funderPattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/funders.txt"),
-                    GrobidAnalyzer.getInstance(), true);
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/funders.txt"),
+                GrobidAnalyzer.getInstance(), true);
         } catch (PatternSyntaxException e) {
             throw new GrobidResourceException("Error when compiling lexicon matcher for funders.", e);
         } catch (Exception e) {
@@ -537,19 +539,19 @@ public void initFunders() {
     public void initResearchInfrastructures() {
         try {
             researchInfrastructurePattern = new FastMatcher(new
-                    File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/research_infrastructures.txt"),
-                    GrobidAnalyzer.getInstance(), true);
+                File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/research_infrastructures.txt"),
+                GrobidAnalyzer.getInstance(), true);
             // store some name mapping
             researchOrganizations = new TreeMap<>();
 
             File file = new File(GrobidProperties.getGrobidHomePath() + "/lexicon/organisations/research_infrastructures_map.txt");
             if (!file.exists()) {
                 throw new GrobidResourceException("Cannot add research infrastructure names to dictionary, because file '" +
-                        file.getAbsolutePath() + "' does not exists.");
+                    file.getAbsolutePath() + "' does not exists.");
             }
             if (!file.canRead()) {
                 throw new GrobidResourceException("Cannot add research infrastructure to dictionary, because cannot read file '" +
-                        file.getAbsolutePath() + "'.");
+                    file.getAbsolutePath() + "'.");
             }
             InputStream ist = null;
             BufferedReader dis = null;
@@ -651,7 +653,7 @@ public List<OrganizationRecord> getOrganizationNamingInfo(String name) {
     /**
      * Map the language codes used by the language identifier component to the normal
      * language name.
-     *
+     * <p>
      * Note: due to an older bug, kr is currently map to Korean too - this should
      * disappear at some point in the future after retraining of models
      *
@@ -847,7 +849,7 @@ public List<OffsetPosition> tokenPositionsCityNames(List<LayoutToken> s) {
 
     /** Organisation names **/
 
-	/**
+    /**
      * Soft look-up in organisation name gazetteer for a given string with token positions
      */
     public List<OffsetPosition> tokenPositionsOrganisationNames(String s) {
@@ -913,7 +915,7 @@ public List<OffsetPosition> charPositionsOrganisationNames(List<LayoutToken> s)
         return results;
     }
 
-	/**
+    /**
      * Soft look-up in organisation form name gazetteer for a given string with token positions
      */
     public List<OffsetPosition> tokenPositionsOrgForm(String s) {
@@ -992,7 +994,7 @@ public List<OffsetPosition> tokenPositionsLocationNames(List<LayoutToken> s) {
     /**
      * Soft look-up in location name gazetteer for a string, return a list of positions referring
      * to the character positions within the string.
-     *
+     * <p>
      * For example "The car is in Milan" as Milan is a location, would return OffsetPosition(14,19)
      *
      * @param s the input string
@@ -1009,7 +1011,7 @@ public List<OffsetPosition> charPositionsLocationNames(String s) {
     /**
      * Soft look-up in location name gazetteer for a list of LayoutToken, return a list of
      * positions referring to the character positions in the input sequence.
-     *
+     * <p>
      * For example "The car is in Milan" as Milan is a location, would return OffsetPosition(14,19)
      *
      * @param s the input list of LayoutToken
@@ -1023,7 +1025,7 @@ public List<OffsetPosition> charPositionsLocationNames(List<LayoutToken> s) {
         return results;
     }
 
-	/**
+    /**
      * Soft look-up in person title gazetteer for a given string with token positions
      */
     public List<OffsetPosition> tokenPositionsPersonTitle(String s) {
@@ -1185,7 +1187,7 @@ public static List<OffsetPosition> characterPositionsUrlPattern(List<LayoutToken
     /**
      * Identify in tokenized input the positions of a URL pattern with character positions,
      * and refine positions based on possible PDF URI annotations.
-     *
+     * <p>
      * This will produce better quality recognized URL, avoiding missing suffixes and problems
      * with break lines and spaces.
      **/
@@ -1226,8 +1228,8 @@ public static OffsetPosition getTokenPositions(int startPos, int endPos, List<La
         List<LayoutToken> urlTokens = new ArrayList<>();
         int tokenPos = 0;
         int tokenIndex = 0;
-        for(LayoutToken localToken : layoutTokens) {
-            if (startPos <= tokenPos && (tokenPos+localToken.getText().length() <= endPos) ) {
+        for (LayoutToken localToken : layoutTokens) {
+            if (startPos <= tokenPos && (tokenPos + localToken.getText().length() <= endPos)) {
                 urlTokens.add(localToken);
                 if (startTokenIndex == -1)
                     startTokenIndex = tokenIndex;
@@ -1249,14 +1251,14 @@ public static OffsetPosition getTokenPositions(int startPos, int endPos, List<La
      * Notice the absence of the String text parameter.
      */
     public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotations(
-                                    List<LayoutToken> layoutTokens,
-                                    List<PDFAnnotation> pdfAnnotations) {
+        List<LayoutToken> layoutTokens,
+        List<PDFAnnotation> pdfAnnotations) {
         List<OffsetPosition> urlPositions = Lexicon.characterPositionsUrlPattern(layoutTokens);
         List<OffsetPosition> resultPositions = new ArrayList<>();
 
         // Do we need to extend the url position based on additional position of the corresponding
         // PDF annotation?
-        for(OffsetPosition urlPosition : urlPositions) {
+        for (OffsetPosition urlPosition : urlPositions) {
             int startPos = urlPosition.start;
             int endPos = urlPosition.end;
 
@@ -1272,7 +1274,7 @@ public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotation
                 continue;
             }
 
-            List<LayoutToken> urlTokens = new ArrayList<>(layoutTokens.subList(startTokenIndex, endTokensIndex+1));
+            List<LayoutToken> urlTokens = new ArrayList<>(layoutTokens.subList(startTokenIndex, endTokensIndex + 1));
 
             String urlString = LayoutTokensUtil.toText(urlTokens);
 
@@ -1282,11 +1284,8 @@ public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotation
             if (CollectionUtils.isNotEmpty(urlTokens)) {
                 LayoutToken lastToken = urlTokens.get(urlTokens.size() - 1);
                 if (pdfAnnotations != null) {
-                    targetAnnotation = pdfAnnotations.stream()
-                        .filter(pdfAnnotation ->
-                            pdfAnnotation.getType() != null && pdfAnnotation.getType() == PDFAnnotation.Type.URI && pdfAnnotation.cover(lastToken))
-                        .findFirst()
-                        .orElse(null);
+                    targetAnnotation = matchPdfAnnotationsBasedOnCoordinatesDestinationOrLastTokens(pdfAnnotations, urlTokens);
+
                     correctedLastTokenIndex = urlTokens.size() - 1;
 
                     // If we cannot match, maybe the regex got some characters too much, e.g. dots, parenthesis,etc..
@@ -1296,14 +1295,10 @@ public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotation
                         String lastTokenText = lastToken.getText();
                         int index = urlTokens.size() - 1;
                         // The error should be within a few characters, so we stop if the token length is greater than 1
-                        while(index > 0 && lastTokenText.length() == 1 && !Character.isLetterOrDigit(lastTokenText.charAt(0)) && targetAnnotation==null) {
+                        while (index > 0 && lastTokenText.length() == 1 && !Character.isLetterOrDigit(lastTokenText.charAt(0)) && targetAnnotation == null) {
                             index -= 1;
                             LayoutToken finalLastToken1 = urlTokens.get(index);
-                            targetAnnotation = pdfAnnotations.stream()
-                                .filter(pdfAnnotation ->
-                                    pdfAnnotation.getType() != null && pdfAnnotation.getType() == PDFAnnotation.Type.URI && pdfAnnotation.cover(finalLastToken1))
-                                .findFirst()
-                                .orElse(null);
+                            targetAnnotation = matchPdfAnnotationsBasedOnCoordinatesDestinationOrLastTokens(pdfAnnotations, urlTokens);
 
                             correctedLastTokenIndex = index;
                         }
@@ -1315,7 +1310,13 @@ public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotation
                 String destination = targetAnnotation.getDestination();
 
                 int destinationPos = 0;
-                if (destination.contains(urlString)) {
+                if (urlString.replaceAll("\\s", "").equals(destination)) {
+                    // Nothing to do here, we ignore the correctedLastTokenIndex because the regex got everything we need
+                } else if (
+                    destination.contains(urlString)
+                        || destination.contains(urlString.replaceAll("\\s", ""))
+                        || destination.contains(StringUtils.stripEnd(urlString, "-"))
+                ) {
                     //In this case the regex did not catch all the URL, so we need to extend it using the
                     // destination URL from the annotation
                     destinationPos = destination.indexOf(urlString) + urlString.length();
@@ -1327,7 +1328,7 @@ public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotation
 
                             if ("\n".equals(nextToken.getText()) ||
                                 " ".equals(nextToken.getText()) ||
-                                nextToken.getText().length() == 0) {
+                                nextToken.getText().isEmpty()) {
                                 endPos += nextToken.getText().length();
                                 additionalSpaces += nextToken.getText().length();
                                 additionalTokens += 1;
@@ -1355,8 +1356,6 @@ public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotation
                             endPos -= additionalSpaces;
                         }
                     }
-                } else if (urlString.replaceAll("\\s", "").equals(destination)) {
-                    // Nothing to do here, we ignore the correctedLastTokenIndex because the regex got everything we need
                 } else if (urlString.contains(destination) || urlString.replaceAll("\\s", "").contains(destination)) {
                     // In this case the regex has catches too much, usually this should be limited to a few characters,
                     // but we cannot know it for sure. Here we first find the difference between the destination and the
@@ -1406,6 +1405,63 @@ public static List<OffsetPosition> characterPositionsUrlPatternWithPdfAnnotation
         return resultPositions;
     }
 
+    @Nullable
+    private static PDFAnnotation matchPdfAnnotationsBasedOnCoordinatesDestinationOrLastTokens(List<PDFAnnotation> pdfAnnotations, List<LayoutToken> urlTokens) {
+        LayoutToken lastToken = urlTokens.get(urlTokens.size() - 1);
+        String urlString = LayoutTokensUtil.toText(urlTokens);
+
+        List<PDFAnnotation> possibleTargetAnnotations = pdfAnnotations.stream()
+            .filter(pdfAnnotation ->
+                pdfAnnotation.getType() != null
+                    && pdfAnnotation.getType() == PDFAnnotation.Type.URI
+                    && pdfAnnotation.cover(lastToken)
+            ).collect(Collectors.toList());
+
+        PDFAnnotation targetAnnotation;
+        if (possibleTargetAnnotations.size() > 1) {
+            possibleTargetAnnotations = possibleTargetAnnotations.stream()
+                .filter(pdfAnnotation ->
+                    pdfAnnotation.getDestination().contains(urlString)
+                )
+                .collect(Collectors.toList());
+
+            if (possibleTargetAnnotations.size() > 1) {
+                // If the lastToken is any of ./:_ we should add the token before
+                int index = urlTokens.size() - 1;
+                if (urlTokens.size() > 1 && lastToken.getText().matches("[.:_\\-/]")) {
+                    index -= 1;
+                }
+
+                while (index > 0 && possibleTargetAnnotations.size() > 1) {
+                    final String lastTokenText2 = LayoutTokensUtil.toText(urlTokens.subList(index - 1, urlTokens.size()));
+
+                    possibleTargetAnnotations = possibleTargetAnnotations.stream()
+                        .filter(pdfAnnotation ->
+                            pdfAnnotation.getDestination().contains(lastTokenText2)
+                        )
+                        .collect(Collectors.toList());
+                    index--;
+                }
+
+                targetAnnotation = possibleTargetAnnotations.stream()
+                    .findFirst()
+                    .orElse(null);
+
+            } else {
+                targetAnnotation = possibleTargetAnnotations.stream()
+                    .findFirst()
+                    .orElse(null);
+            }
+
+        } else {
+            targetAnnotation = possibleTargetAnnotations.stream()
+                .findFirst()
+                .orElse(null);
+        }
+
+        return targetAnnotation;
+    }
+
 
     /**
      * Identify in tokenized input the positions of an email address pattern with token positions
diff --git a/grobid-core/src/test/java/org/grobid/core/lexicon/LexiconTest.java b/grobid-core/src/test/java/org/grobid/core/lexicon/LexiconTest.java
index 4436cbd979..8672a11801 100644
--- a/grobid-core/src/test/java/org/grobid/core/lexicon/LexiconTest.java
+++ b/grobid-core/src/test/java/org/grobid/core/lexicon/LexiconTest.java
@@ -409,7 +409,7 @@ public void testCharacterPositionsUrlPatternWithPDFAnnotations_URL_shouldReturnC
         PDFAnnotation annotation1 = new PDFAnnotation();
         annotation1.setPageNumber(10);
         List<BoundingBox> boundingBoxes = new ArrayList<>();
-        boundingBoxes.add(BoundingBox.fromPointAndDimensions(10, 378.093,  625.354,  167.51799999999997,  10.599999999999909));
+        boundingBoxes.add(BoundingBox.fromPointAndDimensions(10, 378.093, 625.354, 167.51799999999997, 10.599999999999909));
         annotation1.setBoundingBoxes(boundingBoxes);
         annotation1.setDestination("https://github.com/shijuanchen/shift_cult");
         annotation1.setType(PDFAnnotation.Type.URI);
@@ -417,7 +417,7 @@ public void testCharacterPositionsUrlPatternWithPDFAnnotations_URL_shouldReturnC
         PDFAnnotation annotation2 = new PDFAnnotation();
         annotation2.setPageNumber(10);
         List<BoundingBox> boundingBoxes2 = new ArrayList<>();
-        boundingBoxes2.add(BoundingBox.fromPointAndDimensions(10, 475.497, 637.854,  77.26,10.60));
+        boundingBoxes2.add(BoundingBox.fromPointAndDimensions(10, 475.497, 637.854, 77.26, 10.60));
         annotation2.setBoundingBoxes(boundingBoxes2);
         annotation2.setDestination("https://sites.google.com/view/shijuanchen/research/shift_cult");
         annotation2.setType(PDFAnnotation.Type.URI);
@@ -432,6 +432,64 @@ public void testCharacterPositionsUrlPatternWithPDFAnnotations_URL_shouldReturnC
         assertThat(input.substring(url1.start, url1.end), is("https://sites.google. \ncom/view/shijuanchen/research/shift_cult"));
     }
 
+    @Test
+    public void testCharacterPositionsUrlPatternWithPDFAnnotations_DuplicatedMatchingPDFAnnotations_shouldReturnCorrectIntervalBasedOnText4() throws Exception {
+        final String input = "Google Earth Engine applications to visualize the \n" +
+            "datasets: https://github.com/shijuanchen/shift_cult \n" +
+            "Map products visualization: https://sites.google. \n" +
+            "com/view/shijuanchen/research/shift_cult \n";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+        LayoutToken lastTokenOfTheURL1 = tokenisedInput.get(28);
+        lastTokenOfTheURL1.setPage(10);
+        lastTokenOfTheURL1.setX(504.75295121951217);
+        lastTokenOfTheURL1.setY(626.353);
+        lastTokenOfTheURL1.setWidth(40.858048780487806);
+        lastTokenOfTheURL1.setHeight(9.3999);
+
+        LayoutToken lastTokenOfTheURL2 = tokenisedInput.get(44);
+        lastTokenOfTheURL2.setPage(10);
+        lastTokenOfTheURL2.setX(526.9964666666667);
+        lastTokenOfTheURL2.setY(638.853);
+        lastTokenOfTheURL2.setWidth(22.0712);
+        lastTokenOfTheURL2.setHeight(9.3999);
+
+        PDFAnnotation annotation1 = new PDFAnnotation();
+        annotation1.setPageNumber(10);
+        List<BoundingBox> boundingBoxes = new ArrayList<>();
+        boundingBoxes.add(BoundingBox.fromPointAndDimensions(10, 378.093, 625.354, 167.51799999999997, 10.599999999999909));
+        annotation1.setBoundingBoxes(boundingBoxes);
+        annotation1.setDestination("https://github.com/shijuanchen/shift_cult");
+        annotation1.setType(PDFAnnotation.Type.URI);
+
+        PDFAnnotation annotation2 = new PDFAnnotation();
+        annotation2.setPageNumber(10);
+        List<BoundingBox> boundingBoxes2 = new ArrayList<>();
+        boundingBoxes2.add(BoundingBox.fromPointAndDimensions(10, 475.497, 637.854, 77.26, 10.60));
+        annotation2.setBoundingBoxes(boundingBoxes2);
+        annotation2.setDestination("https://www.google.com");
+        annotation2.setType(PDFAnnotation.Type.URI);
+
+        PDFAnnotation annotation3 = new PDFAnnotation();
+        annotation3.setPageNumber(10);
+        List<BoundingBox> boundingBoxes3 = new ArrayList<>();
+        boundingBoxes3.add(BoundingBox.fromPointAndDimensions(10, 475.497, 637.854, 77.26, 10.60));
+        annotation3.setBoundingBoxes(boundingBoxes3);
+        annotation3.setDestination("https://sites.google.com/view/shijuanchen/research/shift_cult");
+        annotation3.setType(PDFAnnotation.Type.URI);
+
+        List<PDFAnnotation> pdfAnnotations = List.of(annotation1, annotation2, annotation3);
+
+        List<OffsetPosition> offsetPositions = Lexicon.characterPositionsUrlPatternWithPdfAnnotations(tokenisedInput, pdfAnnotations);
+
+        assertThat(offsetPositions, hasSize(2));
+        OffsetPosition url0 = offsetPositions.get(0);
+        assertThat(input.substring(url0.start, url0.end), is("https://github.com/shijuanchen/shift_cult"));
+        OffsetPosition url1 = offsetPositions.get(1);
+        assertThat(input.substring(url1.start, url1.end), is("https://sites.google. \ncom/view/shijuanchen/research/shift_cult"));
+    }
+
+
     @Test
     public void testCharacterPositionsUrlPatternWithPDFAnnotations_URL_shouldReturnCorrectIntervalBasedOnText5() throws Exception {
         final String input = ", accessible through the University of Hawaii Sea Level Center with station ID of UHSLC ID 57 \n" +
@@ -494,4 +552,235 @@ public void testGetTokenPosition() throws Exception {
 
     }
 
+    @Test
+    public void testCharacterPositionsUrlPattern_URLRegexMatchesTooLittle_shouldReturnCorrectInterval_1() throws Exception {
+        final String input = "We appreciate assistance from The Research Support Center, Research Center for Human Disease Modeling, \n" +
+            "and Kyushu University Graduate School of Medical Sciences. We thank Dr. Mitsuru Watanabe and Ms. Eriko \n" +
+            "Matsuo from the Department of Neurology, Kyushu University, for the technical assistance in the flow cytometric \n" +
+            "analysis. We thank Ms. Sachiko Koyama and Hideko Noguchi from the Department of Neuropathology, Kyushu \n" +
+            "University, for excellent technical assistance in the histological analysis. We thank Mr. Tetsuo Kishi from the \n" +
+            "Department of Medicine, Kyushu University School of Medicine for the immunohistochemical analysis. We \n" +
+            "thank J. Ludovic Croxford, PhD, from Edanz (https:// jp. edanz. com/ ac) for editing a draft of this manuscript.";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        //These have to overlap with the regex output to make sure that the annotation is selected
+        LayoutToken lastTokenOfTheURL1 = tokenisedInput.get(219);
+        lastTokenOfTheURL1.setPage(15);
+        lastTokenOfTheURL1.setX(322.49060000000003);
+        lastTokenOfTheURL1.setY(454.586);
+        lastTokenOfTheURL1.setWidth(16.338);
+        lastTokenOfTheURL1.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL2 = tokenisedInput.get(220);
+        lastTokenOfTheURL2.setPage(15);
+        lastTokenOfTheURL2.setX(338.8286);
+        lastTokenOfTheURL2.setY(454.586);
+        lastTokenOfTheURL2.setWidth(3.2676);
+        lastTokenOfTheURL2.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL3 = tokenisedInput.get(221);
+        lastTokenOfTheURL3.setPage(15);
+        lastTokenOfTheURL3.setX(342.0962);
+        lastTokenOfTheURL3.setY(454.586);
+        lastTokenOfTheURL3.setWidth(3.2676);
+        lastTokenOfTheURL3.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL4 = tokenisedInput.get(222);
+        lastTokenOfTheURL4.setPage(15);
+        lastTokenOfTheURL4.setX(345.3638);
+        lastTokenOfTheURL4.setY(454.586);
+        lastTokenOfTheURL4.setWidth(3.2676);
+        lastTokenOfTheURL4.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL5 = tokenisedInput.get(224);
+        lastTokenOfTheURL5.setPage(15);
+        lastTokenOfTheURL5.setX(348.667);
+        lastTokenOfTheURL5.setY(454.586);
+        lastTokenOfTheURL5.setWidth(5.868599999999999);
+        lastTokenOfTheURL5.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL6 = tokenisedInput.get(225);
+        lastTokenOfTheURL6.setPage(15);
+        lastTokenOfTheURL6.setX(354.5356);
+        lastTokenOfTheURL6.setY(454.586);
+        lastTokenOfTheURL6.setWidth(2.9342999999999995);
+        lastTokenOfTheURL6.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL7 = tokenisedInput.get(227);
+        lastTokenOfTheURL7.setPage(15);
+        lastTokenOfTheURL7.setX(357.514);
+        lastTokenOfTheURL7.setY(454.586);
+        lastTokenOfTheURL7.setWidth(19.5645);
+        lastTokenOfTheURL7.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL10 = tokenisedInput.get(231);
+        lastTokenOfTheURL10.setPage(15);
+        lastTokenOfTheURL10.setX(395.106375);
+        lastTokenOfTheURL10.setY(454.586);
+        lastTokenOfTheURL10.setWidth(4.690125);
+        lastTokenOfTheURL10.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL11 = tokenisedInput.get(233);
+        lastTokenOfTheURL11.setPage(15);
+        lastTokenOfTheURL11.setX(399.842);
+        lastTokenOfTheURL11.setY(454.586);
+        lastTokenOfTheURL11.setWidth(7.295399999999999);
+        lastTokenOfTheURL11.setHeight(9.099);
+
+        LayoutToken lastTokenOfTheURL12 = tokenisedInput.get(234);
+        lastTokenOfTheURL12.setPage(15);
+        lastTokenOfTheURL12.setX(407.13739999999996);
+        lastTokenOfTheURL12.setY(454.586);
+        lastTokenOfTheURL12.setWidth(3.6476999999999995);
+        lastTokenOfTheURL12.setHeight(9.099);
+
+        PDFAnnotation annotation1 = new PDFAnnotation();
+        annotation1.setPageNumber(15);
+        List<BoundingBox> boundingBoxes = new ArrayList<>();
+        boundingBoxes.add(BoundingBox.fromPointAndDimensions(15, 322.37, 451.55, 85.305, 12.140999999999963));
+        annotation1.setBoundingBoxes(boundingBoxes);
+        annotation1.setDestination("https://jp.edanz.com/ac");
+        annotation1.setType(PDFAnnotation.Type.URI);
+
+        List<PDFAnnotation> pdfAnnotations = List.of(annotation1);
+
+        List<OffsetPosition> offsetPositions = Lexicon.characterPositionsUrlPatternWithPdfAnnotations(tokenisedInput, pdfAnnotations);
+
+        assertThat(offsetPositions, hasSize(1));
+        OffsetPosition url0 = offsetPositions.get(0);
+        assertThat(input.substring(url0.start, url0.end), is("https:// jp. edanz. com/ ac"));
+    }
+
+    @Test
+    public void testCharacterPositionsUrlPattern_URLRegexMatchesTooLittle_shouldReturnCorrectInterval_2() throws Exception {
+        /*
+         * This test only aims for the last link
+         */
+        final String input = ", \n" +
+            "based on the sorted BAM files generated by using BWA-MEM (v.0.7.17; http:// \n" +
+            "biobwa.sourceforge.net/) and SAMtools (v1.546; http://www.htslib.org/). MetaBAT2 \n" +
+            "was applied to bin the assemblies with contig depth results under the default \n" +
+            "parameters (minimum contig length ≥ 1500 bp). CheckM v.1.0.3 (https://ecogenom \n" +
+            "ics.github.io/CheckM/) with the lineage_wf workflow was used to estimate the complete \n" +
+            "ness and contamination of MAGs ";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        //These have to overlap with the regex output to make sure that the annotation is selected
+        LayoutToken lastTokenOfTheURL1 = tokenisedInput.get(132);
+        lastTokenOfTheURL1.setPage(5);
+        lastTokenOfTheURL1.setX(331.7820588235294);
+        lastTokenOfTheURL1.setY(467.682);
+        lastTokenOfTheURL1.setWidth(4.307294117647059);
+        lastTokenOfTheURL1.setHeight(10.818);
+
+        LayoutToken lastTokenOfTheURL2 = tokenisedInput.get(133);
+        lastTokenOfTheURL2.setPage(5);
+        lastTokenOfTheURL2.setX(336.08935294117646);
+        lastTokenOfTheURL2.setY(467.682);
+        lastTokenOfTheURL2.setWidth(4.307294117647059);
+        lastTokenOfTheURL2.setHeight(10.818);
+
+        LayoutToken lastTokenOfTheURL3 = tokenisedInput.get(134);
+        lastTokenOfTheURL3.setPage(5);
+        lastTokenOfTheURL3.setX(340.39664705882353);
+        lastTokenOfTheURL3.setY(467.682);
+        lastTokenOfTheURL3.setWidth(34.45835294117647);
+        lastTokenOfTheURL3.setHeight(10.818);
+
+        LayoutToken lastTokenOfTheURL5 = tokenisedInput.get(137);
+        lastTokenOfTheURL5.setPage(5);
+        lastTokenOfTheURL5.setX(41.9999);
+        lastTokenOfTheURL5.setY(479.682);
+        lastTokenOfTheURL5.setWidth(11.487272727272726);
+        lastTokenOfTheURL5.setHeight(10.818);
+
+        PDFAnnotation annotation1 = new PDFAnnotation();
+        annotation1.setPageNumber(5);
+        List<BoundingBox> boundingBoxes = new ArrayList<>();
+        boundingBoxes.add(BoundingBox.fromPointAndDimensions(5, 41.00, 468.50, 335.00, 23.00));
+        boundingBoxes.add(BoundingBox.fromPointAndDimensions(5, 134.01, 454.50, 170.18, 24.00));
+        boundingBoxes.add(BoundingBox.fromPointAndDimensions(5, 123.68, 481.50, 0.00, 9.00));
+        annotation1.setBoundingBoxes(boundingBoxes);
+        annotation1.setDestination("https://ecogenomics.github.io/CheckM/");
+        annotation1.setType(PDFAnnotation.Type.URI);
+
+        List<PDFAnnotation> pdfAnnotations = List.of(annotation1);
+
+        List<OffsetPosition> offsetPositions = Lexicon.characterPositionsUrlPatternWithPdfAnnotations(tokenisedInput, pdfAnnotations);
+
+        assertThat(offsetPositions, hasSize(3));
+        OffsetPosition url2 = offsetPositions.get(2);
+        assertThat(input.substring(url2.start, url2.end), is("https://ecogenom \n" +
+            "ics.github.io/CheckM/"));
+    }
+
+    @Test
+    public void testCharacterPositionsUrlPattern_URLContainsSpuriosBreklineHypen_shouldReturnCorrectInterval() throws Exception {
+        /*
+         * This test only aims for the last link
+         */
+        final String input = "Details and code for using the IntOGen framework are available at \n" +
+            "https://intogen.readthedocs.io/en/latest/index.html. The specific \n" +
+            "code to perform this analysis is available in the Genomics England \n" +
+            "research environment (https://re-docs.genomicsengland.co.uk/ \n" +
+            "access/) under /re_gecip/shared_allGeCIPs/pancancer_drivers/code/. \n" +
+            "The link to becoming a member of the Genomics England research \n" +
+            "network and obtaining access can be found at https://www.genomic-\n" +
+            "sengland.co.uk/research/academic/join-gecip. The code to perform \n" +
+            "the canSAR chemogenomics analysis is available through Zenodo \n" +
+            "(https://doi.org/10.5281/zenodo.8329054) (ref. ";
+
+        List<LayoutToken> tokenisedInput = GrobidAnalyzer.getInstance().tokenizeWithLayoutToken(input);
+
+        //These have to overlap with the regex output to make sure that the annotation is selected
+        LayoutToken lastTokenOfTheURL0 = tokenisedInput.get(153);
+        lastTokenOfTheURL0.setPage(11);
+        lastTokenOfTheURL0.setX(523.39535);
+        lastTokenOfTheURL0.setY(436.559);
+        lastTokenOfTheURL0.setWidth(4.205850000000001);
+        lastTokenOfTheURL0.setHeight(8.217);
+
+        LayoutToken lastTokenOfTheURL1 = tokenisedInput.get(154);
+        lastTokenOfTheURL1.setPage(11);
+        lastTokenOfTheURL1.setX(527.6012);
+        lastTokenOfTheURL1.setY(436.559);
+        lastTokenOfTheURL1.setWidth(29.44095);
+        lastTokenOfTheURL1.setHeight(8.217);
+
+        LayoutToken lastTokenOfTheURL2 = tokenisedInput.get(155);
+        lastTokenOfTheURL2.setPage(11);
+        lastTokenOfTheURL2.setX(557.04215);
+        lastTokenOfTheURL2.setY(436.559);
+        lastTokenOfTheURL2.setWidth(8.217);
+        lastTokenOfTheURL2.setHeight(10.818);
+
+        LayoutToken lastTokenOfTheURL3 = tokenisedInput.get(157);
+        lastTokenOfTheURL3.setPage(11);
+        lastTokenOfTheURL3.setX(306.141);
+        lastTokenOfTheURL3.setY(447.309);
+        lastTokenOfTheURL3.setWidth(31.902000000000005);
+        lastTokenOfTheURL3.setHeight(8.217);
+
+        PDFAnnotation annotation1 = new PDFAnnotation();
+        annotation1.setPageNumber(11);
+        List<BoundingBox> boundingBoxes = new ArrayList<>();
+        boundingBoxes.add(BoundingBox.fromPointAndDimensions(11,477.14,434.60,84.12,10.18));
+//        boundingBoxes.add(BoundingBox.fromPointAndDimensions(5, 134.01, 454.50, 170.18, 24.00));
+//        boundingBoxes.add(BoundingBox.fromPointAndDimensions(5, 123.68, 481.50, 0.00, 9.00));
+        annotation1.setBoundingBoxes(boundingBoxes);
+        annotation1.setDestination("https://www.genomicsengland.co.uk/research/academic/join-gecip");
+        annotation1.setType(PDFAnnotation.Type.URI);
+
+        List<PDFAnnotation> pdfAnnotations = List.of(annotation1);
+
+        List<OffsetPosition> offsetPositions = Lexicon.characterPositionsUrlPatternWithPdfAnnotations(tokenisedInput, pdfAnnotations);
+
+        assertThat(offsetPositions, hasSize(4));
+        OffsetPosition url2 = offsetPositions.get(2);
+        assertThat(input.substring(url2.start, url2.end), is("https://www.genomic-\n" +
+            "sengland.co.uk/research/academic/join-gecip"));
+    }
+
 }
diff --git a/grobid-trainer/resources/dataset/fulltext/corpus/tei/s41598-020-58065-9.training.fulltext.tei.xml b/grobid-trainer/resources/dataset/fulltext/corpus/tei/s41598-020-58065-9.training.fulltext.tei.xml
index 59bdd0ce98..39251fe8a6 100644
--- a/grobid-trainer/resources/dataset/fulltext/corpus/tei/s41598-020-58065-9.training.fulltext.tei.xml
+++ b/grobid-trainer/resources/dataset/fulltext/corpus/tei/s41598-020-58065-9.training.fulltext.tei.xml
@@ -14,11 +14,11 @@
 
 			<p>The thermodynamic parameters of the LaH 10 superconductor were calculated by means of Eliashberg equations<lb/> on the imaginary axis <ref type="biblio">23</ref> :<lb/></p>
 
-				<formula>∑<lb/> π<lb/> μ<lb/> Δ<lb/> =<lb/> Ω − Ω −<lb/> Ω<lb/> Ω + Δ<lb/> Δ<lb/> =−<lb/> Z<lb/> k T<lb/> K<lb/> [ (<lb/> )<lb/> ( )]<lb/> ,<lb/> <label>(1)<lb/></label> n n<lb/> B<lb/> m<lb/> M<lb/> M<lb/> n<lb/> m<lb/> m<lb/> m<lb/> m<lb/> m<lb/> 2<lb/> 2<lb/> <lb/></formula>
+				<formula>∑<lb/> π<lb/> μ<lb/> Δ<lb/> =<lb/> Ω − Ω −<lb/> Ω<lb/> Ω + Δ<lb/> Δ<lb/> =−<lb/> Z<lb/> k T<lb/> K<lb/> [ (<lb/> )<lb/> ( )]<lb/> ,<lb/> (<label>1</label>)<lb/> n n<lb/> B<lb/> m<lb/> M<lb/> M<lb/> n<lb/> m<lb/> m<lb/> m<lb/> m<lb/> m<lb/> 2<lb/> 2<lb/> <lb/></formula>
 		
 			<p>and<lb/></p>
 
-				<formula>∑<lb/> π<lb/> = +<lb/> Ω − Ω<lb/> Ω + Δ<lb/> Ω<lb/> Ω<lb/> .<lb/> =−<lb/> Z<lb/> kT<lb/> K<lb/> Z<lb/> 1<lb/> (<lb/> )<lb/> <label>(2)<lb/></label> n<lb/> B<lb/> m<lb/> M<lb/> M<lb/> n<lb/> m<lb/> m<lb/> m<lb/> m<lb/> n<lb/> m<lb/> 2<lb/> 2<lb/></formula>
+				<formula>∑<lb/> π<lb/> = +<lb/> Ω − Ω<lb/> Ω + Δ<lb/> Ω<lb/> Ω<lb/> .<lb/> =−<lb/> Z<lb/> kT<lb/> K<lb/> Z<lb/> 1<lb/> (<lb/> )<lb/> (<label>2</label>)<lb/> n<lb/> B<lb/> m<lb/> M<lb/> M<lb/> n<lb/> m<lb/> m<lb/> m<lb/> m<lb/> n<lb/> m<lb/> 2<lb/> 2<lb/></formula>
 
 			<p>The symbols Δ = Δ Ω<lb/> i<lb/> ( )<lb/> n<lb/> n and =<lb/> Ω<lb/> Z<lb/> Z i<lb/> ( )<lb/> n<lb/> n denote the order parameter and the wave function renormalization<lb/> factor, respectively. The quantity Ω n represents the Matsubara frequency:<lb/> π<lb/> Ω =<lb/> −<lb/> k T n<lb/> ( 2<lb/> 1)<lb/> n<lb/> B<lb/> , where k B is the<lb/> Boltzmann constant. The pairing kernel is defined by:<lb/> λ<lb/> Ω − Ω =<lb/> Ω<lb/> Ω − Ω<lb/> +Ω<lb/> K(<lb/> )<lb/> n<lb/> m<lb/> (<lb/> )<lb/> C<lb/> n<lb/> m<lb/> C<lb/> 2<lb/> 2<lb/> 2 , where λ denotes the elec-<lb/>tron-phonon coupling constant. We determined the value of λ on the basis of experimental data <ref type="biblio">20,21</ref> and the<lb/> condition: Δ<lb/> =<lb/> = =<lb/> [<lb/> ]<lb/> 0<lb/> n<lb/> T T<lb/> 1<lb/> C<lb/> . The fitting between the theory and the experimental results is presented in Fig. <ref type="figure">1</ref>. We<lb/> obtained λ a = 2.187 for p a = 150 GPa and λ b = 2.818 for p b = 190 GPa. The symbol Ω C represents the character-<lb/>istic phonon frequency, its value being assumed as Ω C = 100 meV.<lb/></p>
 
@@ -32,19 +32,19 @@
 
 			<figure>Figure 1. The dependence of the maximum value of the order parameter on the electron-phonon coupling<lb/> constant. We consider two cases: =<lb/> T<lb/> 215<lb/> C<lb/> a<lb/> K (p a = 150 GPa) and =<lb/> T<lb/> 260<lb/> C<lb/> b<lb/> K (p b = 190 GPa).<lb/></figure>
 
-				<formula>∑<lb/> ρ<lb/> π<lb/> Δ = −<lb/> Ω + Δ − Ω<lb/> ×<lb/> <lb/> <lb/>     <lb/> −<lb/> Ω<lb/> Ω + Δ<lb/> <lb/> <lb/>      <lb/> =<lb/> F<lb/> k T<lb/> Z<lb/> Z<lb/> (0)<lb/> 2<lb/> (<lb/> )<lb/> ,<lb/> <label>(3)<lb/></label> B<lb/> n<lb/> M<lb/> n<lb/> n<lb/> n<lb/> n<lb/> S<lb/> n<lb/> N<lb/> n<lb/> n<lb/> n<lb/> 1<lb/> 2<lb/> 2<lb/> 2<lb/> 2<lb/></formula>
+				<formula>∑<lb/> ρ<lb/> π<lb/> Δ = −<lb/> Ω + Δ − Ω<lb/> ×<lb/> <lb/> <lb/>     <lb/> −<lb/> Ω<lb/> Ω + Δ<lb/> <lb/> <lb/>      <lb/> =<lb/> F<lb/> k T<lb/> Z<lb/> Z<lb/> (0)<lb/> 2<lb/> (<lb/> )<lb/> ,<lb/> (<label>3</label>)<lb/> B<lb/> n<lb/> M<lb/> n<lb/> n<lb/> n<lb/> n<lb/> S<lb/> n<lb/> N<lb/> n<lb/> n<lb/> n<lb/> 1<lb/> 2<lb/> 2<lb/> 2<lb/> 2<lb/></formula>
 
 			<p>where ρ(0) denotes the value of electronic density of states at Fermi surface; Z n<lb/> S and Z n<lb/> N are the wave function<lb/> normalization factors for the superconducting and the normal state, respectively. Note that ΔF is equal to zero<lb/> exactly for T = T C . This fact results from the overt dependence of free energy on solutions of Eliashberg equations<lb/> (Δ n and Z n ) that have been adjusted to the experimental value of critical temperature by appropriate selection of<lb/> electron-phonon coupling constant (see Fig. <ref type="figure">1</ref>). Thermodynamic critical field should be calculated from the<lb/> formula:<lb/></p>
 
-				<formula>ρ<lb/> π<lb/> ρ<lb/> = − Δ<lb/> .<lb/> H<lb/> F<lb/> (0)<lb/> 8 [ / (0)]<lb/> <label>(4)<lb/></label> C<lb/></formula>
+				<formula>ρ<lb/> π<lb/> ρ<lb/> = − Δ<lb/> .<lb/> H<lb/> F<lb/> (0)<lb/> 8 [ / (0)]<lb/> (<label>4</label>)<lb/> C<lb/></formula>
 
 			<p>The difference in the specific heat between the superconducting and the normal state (ΔC = C S − C N ) is given by:<lb/></p>
 
-				<formula>ρ<lb/> ρ<lb/> Δ<lb/> = −<lb/> Δ<lb/> .<lb/> C T<lb/> k<lb/> k T<lb/> d<lb/> F<lb/> d k T<lb/> ( )<lb/> (0)<lb/> [ / (0)]<lb/> (<lb/> )<lb/> <label>(5)<lb/></label> B<lb/> B<lb/> B<lb/> 2<lb/> 2<lb/></formula>
+				<formula>ρ<lb/> ρ<lb/> Δ<lb/> = −<lb/> Δ<lb/> .<lb/> C T<lb/> k<lb/> k T<lb/> d<lb/> F<lb/> d k T<lb/> ( )<lb/> (0)<lb/> [ / (0)]<lb/> (<lb/> )<lb/> (<label>5</label>)<lb/> B<lb/> B<lb/> B<lb/> 2<lb/> 2<lb/></formula>
 
 			<p>The most convenient way of estimation the specific heat for the normal state is using the expression:<lb/></p>
 
-				<formula>ρ<lb/> γ<lb/> =<lb/> .<lb/> C T<lb/> k<lb/> k T<lb/> ( )<lb/> (0)<lb/> <label>(6)<lb/></label> N<lb/> B<lb/> B<lb/></formula>
+				<formula>ρ<lb/> γ<lb/> =<lb/> .<lb/> C T<lb/> k<lb/> k T<lb/> ( )<lb/> (0)<lb/> (<label>6</label>)<lb/> N<lb/> B<lb/> B<lb/></formula>
 
 			<figure>Figure 2. The dependence of the order parameter on temperature. The insets present the influence of<lb/> temperature on the value of effective electron mass to the band electron mass ratio. Blue or red disks represent<lb/> numerical results. Black curves were obtained from the analytical formulae: Δ<lb/> = Δ<lb/> −<lb/> Γ<lb/> T<lb/> T<lb/> T T<lb/> ( )<lb/> ( ) 1 ( / )<lb/> C<lb/> 0<lb/> and<lb/> <lb/> =<lb/> −<lb/> +<lb/> Γ<lb/> m m<lb/> Z T<lb/> Z T T T<lb/> ZT<lb/> /<lb/> [ ( )<lb/> ( )]( / )<lb/> ( )<lb/> e<lb/> e<lb/> C<lb/> C<lb/> 0<lb/> 0 , where<lb/> λ<lb/> = +<lb/> Z T<lb/> ( ) 1<lb/> C<lb/> , Γ a = 3.5 and Γ b = 3.4. The predictions of<lb/> the BCS theory we marked with grey circles.<lb/></figure>
 
@@ -59,28 +59,28 @@
 			<p>Nevertheless, a sensible qualitative analysis can be made with respect to the influence of the atomic mass of the<lb/> X element on a value of the critical temperature (since the mass of the X element determines Ω max ). In this regard,<lb/> let us refer to the theoretical results obtained within the Eliashberg formalism for H 2 S and H 3 S superconduc-<lb/>tors <ref type="biblio">5,6</ref> . They prove that contributions to the Eliashberg function (α Ω<lb/> F( )<lb/> 2<lb/> ) coming from sulphur and from hydro-<lb/>gen are separated due to a huge difference between atomic masses of these two elements. To be precise, the<lb/> electron-phonon interaction derived from sulphur is crucial in the frequency range from 0 meV to Ω max<lb/> S<lb/> equal to<lb/> about 70 meV, while the contribution derived from hydrogen (Ω<lb/> = 220<lb/> max<lb/> H<lb/> meV) is significant above ~100 meV.<lb/> It is noteworthy that we come upon a similar situation in the case of the LaH 10 compound <ref type="biblio">30</ref> . Therefore the follow-<lb/>ing factorization of the Eliashberg function for the LaXH compound can be assumed:<lb/></p>
 
 				<formula>α<lb/> λ<lb/> θ<lb/> λ<lb/> θ<lb/> λ<lb/> θ<lb/> Ω =<lb/> <lb/> <lb/>    <lb/> Ω<lb/> Ω<lb/> <lb/> <lb/>    <lb/> Ω<lb/> − Ω +<lb/> <lb/> <lb/>    <lb/> Ω<lb/> Ω<lb/> <lb/> <lb/>     <lb/> Ω<lb/> − Ω<lb/> +<lb/> <lb/> <lb/>    <lb/> Ω<lb/> Ω<lb/> <lb/> <lb/>     <lb/> Ω<lb/> − Ω<lb/> F( )<lb/> (<lb/> )<lb/> (<lb/> )<lb/> (<lb/> ) ,<lb/>
-				<label>(7)<lb/></label> 2<lb/> L a<lb/> max<lb/> La 2<lb/> max<lb/> La<lb/> X<lb/> max<lb/> X<lb/> 2<lb/> max<lb/> X<lb/> H<lb/> max<lb/> H<lb/> 2<lb/> max<lb/> H<lb/></formula>
+				(<label>7</label>)<lb/> 2<lb/> L a<lb/> max<lb/> La 2<lb/> max<lb/> La<lb/> X<lb/> max<lb/> X<lb/> 2<lb/> max<lb/> X<lb/> H<lb/> max<lb/> H<lb/> 2<lb/> max<lb/> H<lb/></formula>
 
 			<p>where λ La , λ X , and λ H are the contributions to the electron-phonon coupling constant derived from both metals<lb/> (La, X) and hydrogen, respectively. Similarly, the symbols Ω max<lb/> La , Ω max<lb/> X , and Ω max<lb/> H represent the respective maxi-<lb/>mum phonon frequencies. The value of the critical temperature can be assessed from the generalised formula of<lb/> the BCS theory <ref type="biblio">7</ref> :<lb/></p>
 
-				<formula>λ<lb/> λ<lb/> λμ<lb/> =<lb/> Ω<lb/> .<lb/> <lb/> <lb/> <lb/> <lb/> − .<lb/> +<lb/> − + .<lb/> <lb/> <lb/> <lb/> <lb/> k T<lb/> f f<lb/> 1 27<lb/> exp<lb/> 1 14(1<lb/> )<lb/> (1 0 163 )<lb/> ,<lb/> <label>(8)<lb/></label> B C<lb/> 1 2<lb/> ln<lb/> <lb/></formula>
+				<formula>λ<lb/> λ<lb/> λμ<lb/> =<lb/> Ω<lb/> .<lb/> <lb/> <lb/> <lb/> <lb/> − .<lb/> +<lb/> − + .<lb/> <lb/> <lb/> <lb/> <lb/> k T<lb/> f f<lb/> 1 27<lb/> exp<lb/> 1 14(1<lb/> )<lb/> (1 0 163 )<lb/> ,<lb/> (<label>8</label>)<lb/> B C<lb/> 1 2<lb/> ln<lb/> <lb/></formula>
 
 			<p>while the symbols appearing in Eq. (<ref type="formula">8</ref>) are defined in Table <ref type="table">1</ref>.<lb/></p>
 
 			<p>Let us calculate explicitly the relevant quantities:<lb/></p>
 
-				<formula>λ λ<lb/> λ<lb/> λ<lb/> =<lb/> +<lb/> + ,<lb/> <label>(9)<lb/></label> </formula>
+				<formula>λ λ<lb/> λ<lb/> λ<lb/> =<lb/> +<lb/> + ,<lb/> (<label>9</label>)<lb/> </formula>
 
 			<figure type="table">La<lb/> X<lb/> H<lb/> Quantity<lb/> ∫<lb/> λ =<lb/> Ω<lb/> α<lb/> +∞<lb/> Ω Ω<lb/> Ω<lb/> d<lb/> 2<lb/> F<lb/> 0<lb/> 2 ( ) ( ) ,<lb/> ∫<lb/> Ω =<lb/> <lb/> <lb/> <lb/> <lb/> Ω<lb/> Ω<lb/> <lb/> <lb/> <lb/> <lb/> λ<lb/> α<lb/> +∞<lb/> Ω<lb/> Ω<lb/> d<lb/> exp<lb/> l n( )<lb/> F<lb/> ln<lb/> 2<lb/> 0<lb/> 2 ( )<lb/> ,<lb/> ∫<lb/> α<lb/> Ω =<lb/> Ω<lb/> Ω Ω<lb/> λ<lb/> +∞<lb/> d<lb/> F( )<lb/> 2<lb/> 2<lb/> 0<lb/> 2<lb/> ,<lb/> =<lb/> <lb/> <lb/> <lb/> <lb/> <lb/> +<lb/> <lb/> <lb/> <lb/> <lb/> <lb/> λ<lb/> Λ<lb/> ( )<lb/> f<lb/> 1<lb/> 1<lb/> 1<lb/> 3<lb/> 2<lb/> 1<lb/> 3<lb/> , = +<lb/> λ<lb/> λ<lb/> <lb/> <lb/>     <lb/> Ω<lb/> Ω<lb/> −<lb/> <lb/> <lb/>     <lb/> + Λ<lb/> f<lb/> 1<lb/> 2<lb/> 2<lb/> ln<lb/> 1 2<lb/> 2<lb/> 2<lb/> 2 ,<lb/> Λ 1 = 2.4 − 0.14μ &apos; ,<lb/> μ<lb/> Λ = . +<lb/> Ω Ω<lb/> (0 1 9 )(<lb/> / )<lb/> 2<lb/> 2<lb/> ln<lb/> <lb/> .<lb/> Table 1. The quantities: λ (electron-phonon coupling constant), Ω ln (logarithmic phonon frequency), Ω 2<lb/> (second moment of the normalized weight function), f 1 (strong-coupling correction function), and f 2 (shape<lb/> correction function) μ.<lb/></figure>
 
 				<formula>λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> Ω =<lb/> <lb/> <lb/> <lb/> <lb/> +<lb/> +<lb/> <lb/> <lb/>   Ω<lb/> −<lb/> <lb/> <lb/>   <lb/> <lb/> <lb/> <lb/> <lb/> ×<lb/> <lb/> <lb/> <lb/> <lb/> +<lb/> +<lb/> <lb/> <lb/>   Ω<lb/> −<lb/> <lb/> <lb/>   <lb/> <lb/> <lb/> <lb/> <lb/> ×<lb/> <lb/> <lb/> <lb/> <lb/> +<lb/> +<lb/> <lb/> <lb/>   Ω<lb/> −<lb/> <lb/> <lb/>   <lb/> <lb/> <lb/> <lb/> <lb/> exp<lb/> l n(<lb/> )<lb/> 1<lb/> 2<lb/> exp<lb/> l n(<lb/> )<lb/> 1<lb/> 2<lb/> exp<lb/> l n(<lb/> )<lb/> 1<lb/> 2<lb/> ,<lb/>
-				<label>(10)<lb/></label>
+				(<label>10</label>)<lb/>
 				ln<lb/> La<lb/> La<lb/> X<lb/> H<lb/> max<lb/> La<lb/> X<lb/> La<lb/> X<lb/> H<lb/> max<lb/> X<lb/> H<lb/> La<lb/> X<lb/> H<lb/> max<lb/> H<lb/></formula>
 
 
 			<p>and<lb/></p>
 		
-				<formula>λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> Ω =<lb/> +<lb/> +<lb/> Ω<lb/> +<lb/> +<lb/> +<lb/> Ω<lb/> +<lb/> +<lb/> +<lb/> Ω<lb/> .<lb/> (<lb/> )<lb/> 2<lb/> (<lb/> )<lb/> 2<lb/> (<lb/> )<lb/> 2<lb/> <label>( 11)<lb/></label> 2<lb/> La<lb/> La<lb/> X<lb/> H<lb/> max<lb/> La 2<lb/> X<lb/> La<lb/> X<lb/> H<lb/> max<lb/> X<lb/> 2<lb/> H<lb/> La<lb/> X<lb/> H<lb/> max<lb/> H<lb/> 2<lb/></formula>
+				<formula>λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> λ<lb/> Ω =<lb/> +<lb/> +<lb/> Ω<lb/> +<lb/> +<lb/> +<lb/> Ω<lb/> +<lb/> +<lb/> +<lb/> Ω<lb/> .<lb/> (<lb/> )<lb/> 2<lb/> (<lb/> )<lb/> 2<lb/> (<lb/> )<lb/> 2<lb/> (<label>11</label>)<lb/> 2<lb/> La<lb/> La<lb/> X<lb/> H<lb/> max<lb/> La 2<lb/> X<lb/> La<lb/> X<lb/> H<lb/> max<lb/> X<lb/> 2<lb/> H<lb/> La<lb/> X<lb/> H<lb/> max<lb/> H<lb/> 2<lb/></formula>
 
 			<p>We are going to consider the case Ω<lb/> &lt; Ω<lb/> &lt;<lb/> ~40 meV<lb/> 100 meV<lb/> max<lb/> La<lb/> max<lb/> X<lb/> . It means that we are interested in<lb/> such an X element, the contribution of which to the Eliashberg function fills the gap between contributions com-<lb/>ing from lanthanum and hydrogen. It can be assumed that 0 &lt; λ X &lt; 1, while keeping in mind that λ La = 0.68 <ref type="biblio">31</ref> .<lb/> Additionally, the previous calculations discussed in the work allow to write that λ La + λ H is equal to λ a = 2.187<lb/> for p a = 150 GPa or to λ b = 2.818 for p b = 190 GPa. The quantity <lb/> μ occurring in the Eq. (<ref type="formula">8</ref>) serves now as the<lb/> fitting parameter. One should remember that the formula for the critical temperature given by the Eq. (<ref type="formula">8</ref>) was<lb/> derived with the use of significant simplifying assumptions (the value of the cut-off frequency is neglected, as well<lb/> as the retardation effects modeled by the Matsubara frequency). Therefore the value of the Coulomb pseudopo-<lb/>tential determined from the full Eliashberg equations usually differs from the value of <lb/> μ calculated analytically.<lb/> The experimental data for the LaH 10 superconductor can be reproduced using Eq. (<ref type="formula">8</ref>) and assuming that<lb/> <lb/> μ = .<lb/> 0 170<lb/> a<lb/> and μ = .<lb/> 0 276<lb/> b<lb/> <lb/> .<lb/></p>
 
diff --git a/grobid-trainer/resources/dataset/header/corpus/tei/10.1371_journal.pone.0210163.training.header.tei.xml b/grobid-trainer/resources/dataset/header/corpus/tei/10.1371_journal.pone.0210163.training.header.tei.xml
index 7da5707c04..deab9cc6f4 100644
--- a/grobid-trainer/resources/dataset/header/corpus/tei/10.1371_journal.pone.0210163.training.header.tei.xml
+++ b/grobid-trainer/resources/dataset/header/corpus/tei/10.1371_journal.pone.0210163.training.header.tei.xml
@@ -65,7 +65,7 @@
 	<note type="copyright">© 2019 Rake, Haeussler. This is an<lb/> open access article distributed under the terms of<lb/> the Creative Commons Attribution License, which<lb/> permits unrestricted use, distribution, and<lb/> reproduction in any medium, provided the original<lb/> author and source are credited.<lb/></note>
 
 	Data Availability Statement:
-	<div type="abstract">Data on clinical trials<lb/> conducted in India obtained from ClinicalTrials.gov<lb/> is made available in the corresponding Supporting<lb/> Information file. Other authors can also access this<lb/> information through ClinicalTrials.gov. We obtained<lb/> publication data from the Scopus database which<lb/> is a proprietary database (www.scopus.com).<lb/> Researchers interested in replicating our study can<lb/> access data on trial-related publications following<lb/> the search procedure described in the paper.<lb/> Researchers do not need special privileges to<lb/> access the Scopus database, however, a<lb/> subscription may be required. The authors did not<lb/> have special access privileges to the data.<lb/></div>
+	<note type="availability">Data on clinical trials<lb/> conducted in India obtained from ClinicalTrials.gov<lb/> is made available in the corresponding Supporting<lb/> Information file. Other authors can also access this<lb/> information through ClinicalTrials.gov. We obtained<lb/> publication data from the Scopus database which<lb/> is a proprietary database (www.scopus.com).<lb/> Researchers interested in replicating our study can<lb/> access data on trial-related publications following<lb/> the search procedure described in the paper.<lb/> Researchers do not need special privileges to<lb/> access the Scopus database, however, a<lb/> subscription may be required. The authors did not<lb/> have special access privileges to the data.<lb/></note>
 
 	Funding:
 	<note type="funding">The authors received no specific funding<lb/> for this work.<lb/></note>