diff --git a/.gitattributes b/.gitattributes
index 70d7a72..3ade302 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,5 +1,8 @@
-# Ref: https://stackoverflow.com/questions/19052834/is-it-possible-to-exclude-files-from-git-language-statistics
-data/ZhConversion.php linguist-vendored
+# Exclude external ruleset files from GitHub PL stats
+#   ref: https://stackoverflow.com/questions/19052834/is-it-possible-to-exclude-files-from-git-language-statistics
+# And prevent auto CRLF conversion to avoid checksum mismatch
+data/ZhConversion.php linguist-vendored binary
+data/*.txt linguist-vendored binary
 data/cgroups/*.json linguist-vendored
 web/public/cgroups.json linguist-vendored
 benches/*.txt linguist-vendored
diff --git a/build.rs b/build.rs
index 131014c..f9fa6cb 100644
--- a/build.rs
+++ b/build.rs
@@ -353,7 +353,8 @@ fn read_and_validate_file(path: &str, sha256sum: &[u8; 32]) -> String {
     assert_eq!(
         &sha256(&content),
         sha256sum,
-        "Validating the checksum of zhconv"
+        "Validating the checksum of {}",
+        path.display()
     );
     content
 }
diff --git a/src/lib.rs b/src/lib.rs
index 329900d..2b5214c 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -3,7 +3,7 @@
 //! with the leftmost-longest matching strategy and linear time complexity with respect to the
 //! length of input text and conversion rules. It ships with a bunch of conversion tables,
 //! extracted from [zhConversion.php](https://phabricator.wikimedia.org/source/mediawiki/browse/master/includes/languages/data/ZhConversion.php)
-//! which is maintained and used by MediaWiki and Chinese Wikipedia.
+//! (maintained by MediaWiki and Chinese Wikipedia) and [OpenCC](https://github.com/BYVoid/OpenCC/tree/master/data/dictionary).
 //!
 //! While built-in datasets work well for general case, the converter is never meant to be 100%
 //! accurate, especially for professional text. In Chinese Wikipedia, it is pretty common for