diff --git a/src/unicode.h b/src/unicode.h
index 2f539edf73..b70b7e012d 100644
--- a/src/unicode.h
+++ b/src/unicode.h
@@ -235,6 +235,51 @@ extern void truncate_utf8(UTF8 *string, int len);
  * sequences (plus one and not counting ASCII), so it can be used as a
  * quality measure. A low number might be a false positive, a high
  * number most probably isn't.
+ *
+ *
+ * Related info about UTF-8
+ *
+ * Valid UTF-8 sequences of bytes (1-4 bytes long):
+ *
+ * 00..7F
+ *
+ * C2..DF  80..BF
+ *
+ *   E0    A0..BF  80..BF
+ *   ED    80..9F  80..BF
+ *   Ex    80..BF  80..BF  where Ex does not include E0 and ED
+ *
+ *   F0    90..BF  80..BF  80..BF
+ *   F4    80..8F  80..BF  80..BF  notice 8F as upper bound in the second byte
+ * F1..F3  80..BF  80..BF  80..BF
+ *
+ * (X..Y denotes range from X to Y inclusive. X and Y are byte
+ * values written in hex.)
+ *
+ * Incomplete sequences are invalid.
+ *
+ * Range 80..BF is for trailing bytes (also called continuation
+ * bytes). It is not a valid starting byte. Adjacent values C0 and C1
+ * could be considered starting 2-bytes sequences but they are not
+ * valid in UTF-8.
+ *
+ * Each of E0,ED,F0,F4 starting bytes use a sub-range for trailing
+ * byte at the second position. E0/ED use different halves of the
+ * range for the second byte. F0/F4 allow the second byte in other
+ * proportions (48:16), not overlapping too.
+ *
+ * Valid UTF-8 text cannot include C0,C1,F5..FF bytes at any position.
+ * 80..BF are invalid at starting position.
+ * 00..7F,C2..F4 are invalid at any trailing position (actually they
+ * invalidate previous char while new starting byte itself could be a
+ * part of a valid char, but even then the whole string would be
+ * invalid for purposes of valid_utf8()).
+ *
+ * See also  https://en.wikipedia.org/wiki/UTF-8#Codepage_layout
+ *
+ * Sequences for unallocated, unassigned, reserved (including
+ * noncharacters) code points are considered valid. See here:
+ * https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Noncharacters
  */
 extern int valid_utf8(const UTF8 *source);