diff --git a/backend/danswer/connectors/web/connector.py b/backend/danswer/connectors/web/connector.py
index 6114f0a867a..c5fb6db26d4 100644
--- a/backend/danswer/connectors/web/connector.py
+++ b/backend/danswer/connectors/web/connector.py
@@ -1,4 +1,6 @@
 import io
+import ipaddress
+import socket
 from enum import Enum
 from typing import Any
 from typing import cast
@@ -27,7 +29,6 @@
 from danswer.connectors.models import Section
 from danswer.utils.logger import setup_logger
 
-
 logger = setup_logger()
 
 
@@ -43,16 +44,33 @@ class WEB_CONNECTOR_VALID_SETTINGS(str, Enum):
 
 
 def protected_url_check(url: str) -> None:
+    """Couple considerations:
+    - DNS mapping changes over time so we don't want to cache the results
+    - Fetching this is assumed to be relatively fast compared to other bottlenecks like reading
+      the page or embedding the contents
+    - To be extra safe, all IPs associated with the URL must be global
+    """
     parse = urlparse(url)
-    if parse.scheme == "file":
-        raise ValueError("Not permitted to read local files via Web Connector.")
-    if (
-        parse.scheme == "localhost"
-        or parse.scheme == "127.0.0.1"
-        or parse.hostname == "localhost"
-        or parse.hostname == "127.0.0.1"
-    ):
-        raise ValueError("Not permitted to read localhost urls.")
+    if parse.scheme != "http" and parse.scheme != "https":
+        raise ValueError("URL must be of scheme https?://")
+
+    if not parse.hostname:
+        raise ValueError("URL must include a hostname")
+
+    try:
+        # This may give a large list of IP addresses for domains with extensive DNS configurations
+        # such as large distributed systems of CDNs
+        info = socket.getaddrinfo(parse.hostname, None)
+    except socket.gaierror as e:
+        raise ConnectionError(f"DNS resolution failed for {parse.hostname}: {e}")
+
+    for address in info:
+        ip = address[4][0]
+        if not ipaddress.ip_address(ip).is_global:
+            raise ValueError(
+                f"Non-global IP address detected: {ip}, skipping page {url}. "
+                f"The Web Connector is not allowed to read loopback, link-local, or private ranges"
+            )
 
 
 def check_internet_connection(url: str) -> None:
@@ -194,6 +212,10 @@ def load_from_state(self) -> GenerateDocumentsOutput:
         base_url = to_visit[0]  # For the recursive case
         doc_batch: list[Document] = []
 
+        # Needed to report error
+        at_least_one_doc = False
+        last_error = None
+
         playwright, context = start_playwright()
         restart_playwright = False
         while to_visit:
@@ -202,7 +224,12 @@ def load_from_state(self) -> GenerateDocumentsOutput:
                 continue
             visited_links.add(current_url)
 
-            protected_url_check(current_url)
+            try:
+                protected_url_check(current_url)
+            except Exception as e:
+                last_error = f"Invalid URL {current_url} due to {e}"
+                logger.warning(last_error)
+                continue
 
             logger.info(f"Visiting {current_url}")
 
@@ -251,9 +278,8 @@ def load_from_state(self) -> GenerateDocumentsOutput:
                             to_visit.append(link)
 
                 if page_response and str(page_response.status)[0] in ("4", "5"):
-                    logger.info(
-                        f"Skipped indexing {current_url} due to HTTP {page_response.status} response"
-                    )
+                    last_error = f"Skipped indexing {current_url} due to HTTP {page_response.status} response"
+                    logger.info(last_error)
                     continue
 
                 parsed_html = web_html_cleanup(soup, self.mintlify_cleanup)
@@ -272,7 +298,8 @@ def load_from_state(self) -> GenerateDocumentsOutput:
 
                 page.close()
             except Exception as e:
-                logger.error(f"Failed to fetch '{current_url}': {e}")
+                last_error = f"Failed to fetch '{current_url}': {e}"
+                logger.error(last_error)
                 playwright.stop()
                 restart_playwright = True
                 continue
@@ -280,13 +307,20 @@ def load_from_state(self) -> GenerateDocumentsOutput:
             if len(doc_batch) >= self.batch_size:
                 playwright.stop()
                 restart_playwright = True
+                at_least_one_doc = True
                 yield doc_batch
                 doc_batch = []
 
         if doc_batch:
             playwright.stop()
+            at_least_one_doc = True
             yield doc_batch
 
+        if not at_least_one_doc:
+            if last_error:
+                raise RuntimeError(last_error)
+            raise RuntimeError("No valid pages found.")
+
 
 if __name__ == "__main__":
     connector = WebConnector("https://docs.danswer.dev/")