iterative · rogermparent · Jul 1, 2021 · Jun 15, 2021 · Jun 30, 2021 · Jun 30, 2021
diff --git a/src/scrapeLinks.test.js b/src/scrapeLinks.test.js
@@ -1,47 +1,60 @@
 import scrapeLinks from "./scrapeLinks";
 
-const testMarkdownString = `
-This is a Markdown example with [a link to google](https://www.google.com) and [one with a subdirectory](https://www.google.com/nested/page.html)
+const markdownString = `
+Markdown example with a [link to Google](https://www.google.com), one [with a URL path](https://www.google.com/nested/page.html), and others:
 
-and [another to reddit](www.reddit.com) and [a third to Twitter](facebook.com)
+- One [to reddit](www.reddit.com)
+- A fourth [to Facebook](facebook.com) (incomplete URLs)
+- Finally a few [ref] [links][links] [here][link-here]
 
-as well as some blank lines
+[ref]: https://www.ref.com
+[links]:
+  www.links.in/newline
+[link-here]:
+  /just/a/path
+
+There's also some blank lines, misc. text, and <span>HTML</span> code.
 `;
 
 const plaintextString = `
 This string is plaintext, with links like https://www.google.com and https://www.google.com/nested/page.html
 
-I can scrape "https://reddit.com/r/subreddit" and (https://facebook.com) as well!
+I can scrape "https://reddit.com/r/subreddit" and (https://facebook.com) as well! The new regex can pull www.youtube.com too!?
 
-The new regex can pull www.youtube.com too!? unfortunately, gmail.com is just too vague.
+TODO: Unfortunately, gmail.com is just too vague.
+TODO: Ending in a period won't work well either, e.g. www.something.com.
 `;
 
-const plaintextTestResult = [
+const markdownTestResult = [
   "https://www.google.com",
   "https://www.google.com/nested/page.html",
-  "https://reddit.com/r/subreddit",
-  "https://facebook.com",
-  "www.youtube.com",
+  "www.reddit.com",
+  "facebook.com",
+  "https://www.ref.com",
+  "www.links.in/newline",
+  "/just/a/path",
 ];
 
-const markdownTestResult = [
+const plaintextTestResult = [
   "https://www.google.com",
   "https://www.google.com/nested/page.html",
-  "www.reddit.com",
-  "facebook.com",
+  "https://reddit.com/r/subreddit",
+  "https://facebook.com",
+  "www.youtube.com",
+  "www.something.com.",
 ];
 
 test("It scrapes from the markdown test string", () => {
   expect(
     scrapeLinks({
       filePath: "test.md",
-      content: testMarkdownString,
+      content: markdownString,
     })
   ).toEqual(markdownTestResult);
 });
 
 test("It scrapes from the markdown test split by newlines", () => {
-  const splitTest = testMarkdownString.split("\n");
+  const splitTest = markdownString.split("\n");
   expect(
     scrapeLinks({
       filePath: "test.md",

diff --git a/src/scrapeLinks.ts b/src/scrapeLinks.ts
@@ -27,8 +27,13 @@ const scrapeFromString: (filePath: string, content: string) => string[] = (
         /\[.*?\]\((?:<((?:\(.*?\)|.)*?)>|((?:\(.*?\)|.)*?))(?: ["'].*?["'])?\)/gm,
         (x) => x[2] || x[1]
       );
+      const mdRefLinks = regexMap(
+        content,
+        /\[.*\]:\s*\n?\s*(.*)/gm,
+        (x) => x[2] || x[1]
+      );
       const hrefLinks = regexMap(content, /href="(.*?)"/gm);
-      const links = mdLinks.concat(hrefLinks);
+      const links = mdLinks.concat(mdRefLinks).concat(hrefLinks);
       return links
         ? links
             .filter(Boolean)