pydata · chaburkland · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025 · Sep 24, 2025
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,5 +1,5 @@
 - [ ] closes #xxxx
 - [ ] tests added / passed
-- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff`
+- [ ] passes `flake8 $(git diff --name-only origin/main -- '*.py')`
 - [ ] passes `black --check pandas_datareader`
 - [ ] added entry to docs/source/whatsnew/vLATEST.txt
diff --git a/.gitignore b/.gitignore
@@ -12,4 +12,6 @@ docs/build
 *~
 env/
 .pytest_cache/
-.vscode/
+.vscode/
+*.diff
+pandas_datareader/_version.py
diff --git a/README.md b/README.md
@@ -72,5 +72,5 @@ or
 ``` shell
 git clone https://github.com/pydata/pandas-datareader.git
 cd pandas-datareader
-python setup.py install
+python install -e .
 ```
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -67,7 +67,7 @@ or
 
    git clone https://github.com/pydata/pandas-datareader.git
    cd pandas-datareader
-   python setup.py install
+   python install -e .
 
 `Development documentation <https://pydata.github.io/pandas-datareader/devel/>`__
 is available for the latest changes in master.

diff --git a/docs/source/whatsnew/vLATEST.txt b/docs/source/whatsnew/vLATEST.txt
@@ -0,0 +1,4 @@
+Bug Fixes
+~~~~~~~~~
+
+- Fixed FamaFrench reader to handle updated format
diff --git a/pandas_datareader/famafrench.py b/pandas_datareader/famafrench.py
@@ -101,6 +101,7 @@ def _read_one_data(self, url, params):
 
         doc_chunks, tables = [], []
         data = self._read_zipfile(url)
+        data = re.sub(r"\r(?!\n)", "\r\n", data)  # turn lone CR into CRLF
 
         for chunk in data.split(2 * "\r\n"):
             if len(chunk) < 800:
@@ -114,7 +115,11 @@ def _read_one_data(self, url, params):
             start = 0 if not match else match.start()
 
             df = read_csv(StringIO("Date" + src[start:]), **params)
-            if df.index.min() > 190000:
+            if df.index.min() > 19000000:
+                df.index = to_datetime(df.index.astype(str), format="%Y%m%d").to_period(
+                    freq="D"
+                )
+            elif df.index.min() > 190000:
                 df.index = to_datetime(df.index.astype(str), format="%Y%m").to_period(
                     freq="M"
                 )

diff --git a/pandas_datareader/tests/test_famafrench.py b/pandas_datareader/tests/test_famafrench.py
@@ -10,7 +10,7 @@
 
 
 class TestFamaFrench:
-    def test_get_data(self):
+    def test_get_data_sample(self):
         keys = [
             "F-F_Research_Data_Factors",
             "F-F_ST_Reversal_Factor",
@@ -50,50 +50,50 @@ def test_f_f_research(self):
         exp = pd.DataFrame(
             {
                 "Mkt-RF": [
-                    -3.36,
-                    3.4,
-                    6.31,
-                    2.0,
-                    -7.89,
-                    -5.57,
-                    6.93,
-                    -4.77,
-                    9.54,
-                    3.88,
-                    0.6,
+                    -3.35,
+                    3.39,
+                    6.30,
+                    1.99,
+                    -7.90,
+                    -5.56,
+                    6.92,
+                    -4.78,
+                    9.55,
+                    3.87,
+                    0.59,
                     6.82,
                 ],
                 "SMB": [
-                    0.4,
-                    1.19,
-                    1.48,
-                    4.87,
-                    0.09,
-                    -1.81,
-                    0.2,
-                    -3.0,
-                    3.96,
-                    1.13,
-                    3.76,
-                    0.73,
+                    0.43,
+                    1.18,
+                    1.46,
+                    4.84,
+                    0.13,
+                    -1.79,
+                    0.22,
+                    -3.01,
+                    3.82,
+                    1.08,
+                    3.67,
+                    0.72,
                 ],
                 "HML": [
-                    0.43,
-                    3.22,
-                    2.21,
-                    2.89,
-                    -2.44,
-                    -4.7,
-                    -0.31,
-                    -1.9,
-                    -3.16,
-                    -2.42,
-                    -0.96,
-                    3.69,
+                    0.33,
+                    3.18,
+                    2.19,
+                    2.96,
+                    -2.48,
+                    -4.73,
+                    -0.50,
+                    -1.73,
+                    -3.02,
+                    -2.46,
+                    -0.90,
+                    3.56,
                 ],
                 "RF": [
-                    0.0,
-                    0.0,
+                    0.00,
+                    0.00,
                     0.01,
                     0.01,
                     0.01,
@@ -192,3 +192,9 @@ def test_prior_2_12_breakpoints(self):
 
         exp_index = pd.period_range("2010-01-01", "2010-12-01", freq="M", name="Date")
         tm.assert_index_equal(results[0].index, exp_index)
+
+    def test_all_datasets(self) -> None:
+        for dataset in get_available_datasets():
+            data = web.DataReader(dataset, "famafrench")
+
+            assert tuple(data) == (*range(len(data) - 1), "DESCR")