diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp index a0190c30bfd28..eb40a4cd3d902 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -400,17 +400,14 @@ class GenericTaintChecker : public Checker { void taintUnsafeSocketProtocol(const CallEvent &Call, CheckerContext &C) const; - /// Default taint rules are initalized with the help of a CheckerContext to - /// access the names of built-in functions like memcpy. + /// The taint rules are initalized with the help of a CheckerContext to + /// access user-provided configuration. void initTaintRules(CheckerContext &C) const; - /// CallDescription currently cannot restrict matches to the global namespace - /// only, which is why multiple CallDescriptionMaps are used, as we want to - /// disambiguate global C functions from functions inside user-defined - /// namespaces. - // TODO: Remove separation to simplify matching logic once CallDescriptions - // are more expressive. - + // TODO: The two separate `CallDescriptionMap`s were introduced when + // `CallDescription` was unable to restrict matches to the global namespace + // only. This limitation no longer exists, so the following two maps should + // be unified. mutable std::optional StaticTaintRules; mutable std::optional DynamicTaintRules; }; @@ -506,7 +503,8 @@ void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule, RulesContTy &Rules) { NamePartsTy NameParts = parseNameParts(C); - Rules.emplace_back(CallDescription(NameParts), std::move(Rule)); + Rules.emplace_back(CallDescription(CDM::Unspecified, NameParts), + std::move(Rule)); } void GenericTaintRuleParser::parseConfig(const std::string &Option, @@ -572,196 +570,236 @@ void GenericTaintChecker::initTaintRules(CheckerContext &C) const { std::vector>; using TR = GenericTaintRule; - const Builtin::Context &BI = C.getASTContext().BuiltinInfo; - RulesConstructionTy GlobalCRules{ // Sources - {{{"fdopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"fopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"freopen"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getch"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getchar"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, - {{{"gets"}}, TR::Source({{0}, ReturnValueIndex})}, - {{{"gets_s"}}, TR::Source({{0}, ReturnValueIndex})}, - {{{"scanf"}}, TR::Source({{}, 1})}, - {{{"scanf_s"}}, TR::Source({{}, {1}})}, - {{{"wgetch"}}, TR::Source({{}, ReturnValueIndex})}, + {{CDM::CLibrary, {"fdopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"freopen"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getch"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getchar"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"gets"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"gets_s"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"scanf"}}, TR::Source({{}, 1})}, + {{CDM::CLibrary, {"scanf_s"}}, TR::Source({{}, 1})}, + {{CDM::CLibrary, {"wgetch"}}, TR::Source({{ReturnValueIndex}})}, // Sometimes the line between taint sources and propagators is blurry. // _IO_getc is choosen to be a source, but could also be a propagator. // This way it is simpler, as modeling it as a propagator would require // to model the possible sources of _IO_FILE * values, which the _IO_getc // function takes as parameters. - {{{"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, - {{{"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, - {{{"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, - {{{"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, - {{{"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})}, - {{{"gethostname"}}, TR::Source({{0}})}, - {{{"getnameinfo"}}, TR::Source({{2, 4}})}, - {{{"getseuserbyname"}}, TR::Source({{1, 2}})}, - {{{"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, - {{{"getlogin"}}, TR::Source({{ReturnValueIndex}})}, - {{{"getlogin_r"}}, TR::Source({{0}})}, + {{CDM::CLibrary, {"_IO_getc"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getcwd"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"getwd"}}, TR::Source({{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"readlink"}}, TR::Source({{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})}, + {{CDM::CLibrary, {"get_current_dir_name"}}, + TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"gethostname"}}, TR::Source({{0}})}, + {{CDM::CLibrary, {"getnameinfo"}}, TR::Source({{2, 4}})}, + {{CDM::CLibrary, {"getseuserbyname"}}, TR::Source({{1, 2}})}, + {{CDM::CLibrary, {"getgroups"}}, TR::Source({{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"getlogin"}}, TR::Source({{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getlogin_r"}}, TR::Source({{0}})}, // Props - {{{"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, - {{{"fgetws"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, - {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, - {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})}, - {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, - - {{{"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"getdelim"}}, TR::Prop({{3}}, {{0}})}, - {{{"getline"}}, TR::Prop({{2}}, {{0}})}, - {{{"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, - {{{"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, - {{{"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})}, - {{{"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})}, - {{{"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{{"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - // If memmem was called with a tainted needle and the search was - // successful, that would mean that the value pointed by the return value - // has the same content as the needle. If we choose to go by the policy of - // content equivalence implies taintedness equivalence, that would mean - // haystack should be considered a propagation source argument. - {{{"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - // The comment for memmem above also applies to strstr. - {{{"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{{"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"accept"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fgets"}}, + TR::Prop({{2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fgetws"}}, + TR::Prop({{2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"fscanf"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"fscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"sscanf"}}, TR::Prop({{0}}, {{}, 2})}, + {{CDM::CLibrary, {"sscanf_s"}}, TR::Prop({{0}}, {{}, 2})}, + + {{CDM::CLibrary, {"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getc_unlocked"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"getdelim"}}, TR::Prop({{3}}, {{0}})}, + // TODO: this intends to match the C function `getline()`, but the call + // description also matches the C++ function `std::getline()`; it should + // be ruled out by some additional logic. + {{CDM::CLibrary, {"getline"}}, TR::Prop({{2}}, {{0}})}, + {{CDM::CLibrary, {"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"pread"}}, + TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"read"}}, + TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"fread"}}, + TR::Prop({{3}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"recv"}}, + TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"recvfrom"}}, + TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{CDM::CLibrary, {"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"ttyname_r"}}, + TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{CDM::CLibrary, {"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibrary, {"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"memcmp"}}, + TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"memcpy"}}, + TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"memmove"}}, + TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})}, + + // Note: "memmem" and its variants search for a byte sequence ("needle") + // in a larger area ("haystack"). Currently we only propagate taint from + // the haystack to the result, but in theory tampering with the needle + // could also produce incorrect results. + {{CDM::CLibrary, {"memmem"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + // Analogously, the following functions search for a byte within a buffer + // and we only propagate taint from the buffer to the result. + {{CDM::CLibraryMaybeHardened, {"memchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"memrchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strrchr"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strchrnul"}}, + TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, // FIXME: In case of arrays, only the first element of the array gets // tainted. - {{{"qsort"}}, TR::Prop({{0}}, {{0}})}, - {{{"qsort_r"}}, TR::Prop({{0}}, {{0}})}, - - {{{"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, - {{{"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, - {{{"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"qsort"}}, TR::Prop({{0}}, {{0}})}, + {{CDM::CLibrary, {"qsort_r"}}, TR::Prop({{0}}, {{0}})}, + + {{CDM::CLibrary, {"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strcasecmp"}}, + TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strncmp"}}, + TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strncasecmp"}}, + TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"strndup"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strndupa"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"strdupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"wcsdup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, // strlen, wcslen, strnlen and alike intentionally don't propagate taint. // See the details here: https://github.com/llvm/llvm-project/pull/66086 - {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - {{{"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, - - {{{"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{{"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrncat)}}, - TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcpy)}}, - TR::Prop({{1, 2}}, {{0}})}, - {{CDM::CLibraryMaybeHardened, {BI.getName(Builtin::BIstrlcat)}}, - TR::Prop({{1, 2}}, {{0}})}, - {{CDM::CLibraryMaybeHardened, {{"snprintf"}}}, - TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"sprintf"}}}, - TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"strcpy"}}}, + {{CDM::CLibrary, {"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{CDM::CLibrary, {"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{CDM::CLibrary, {"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibrary, {"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDM::CLibrary, {"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{CDM::CLibraryMaybeHardened, {"strcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"stpcpy"}}}, + {{CDM::CLibraryMaybeHardened, {"stpcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"strcat"}}}, + {{CDM::CLibraryMaybeHardened, {"strcat"}}, TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibraryMaybeHardened, {{"wcsncat"}}}, + {{CDM::CLibraryMaybeHardened, {"wcsncat"}}, TR::Prop({{0, 1}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, {{"strdupa"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, BI.getName(Builtin::BImemcpy)}, - TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BImemmove)}}, + {{CDM::CLibraryMaybeHardened, {"strncpy"}}, TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrncpy)}}, - TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, - {{CDM::CLibrary, {BI.getName(Builtin::BIstrndup)}}, - TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, - {{CDM::CLibrary, {"bcopy"}}, TR::Prop({{0, 2}}, {{1}})}, + {{CDM::CLibraryMaybeHardened, {"strncat"}}, + TR::Prop({{0, 1, 2}}, {{0, ReturnValueIndex}})}, + {{CDM::CLibraryMaybeHardened, {"strlcpy"}}, TR::Prop({{1, 2}}, {{0}})}, + {{CDM::CLibraryMaybeHardened, {"strlcat"}}, TR::Prop({{0, 1, 2}}, {{0}})}, + + // Usually the matching mode `CDM::CLibraryMaybeHardened` is sufficient + // for unified handling of a function `FOO()` and its hardened variant + // `__FOO_chk()`, but in the "sprintf" family the extra parameters of the + // hardened variants are inserted into the middle of the parameter list, + // so that would not work in their case. + // int snprintf(char * str, size_t maxlen, const char * format, ...); + {{CDM::CLibrary, {"snprintf"}}, + TR::Prop({{1, 2}, 3}, {{0, ReturnValueIndex}})}, + // int sprintf(char * str, const char * format, ...); + {{CDM::CLibrary, {"sprintf"}}, + TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})}, + // int __snprintf_chk(char * str, size_t maxlen, int flag, size_t strlen, + // const char * format, ...); + {{CDM::CLibrary, {"__snprintf_chk"}}, + TR::Prop({{1, 4}, 5}, {{0, ReturnValueIndex}})}, + // int __sprintf_chk(char * str, int flag, size_t strlen, const char * + // format, ...); + {{CDM::CLibrary, {"__sprintf_chk"}}, + TR::Prop({{3}, 4}, {{0, ReturnValueIndex}})}, // Sinks - {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, - {{{"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, - {{{"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, - {{{"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, - {{{"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, - {{{"execve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, - {{{"fexecve"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, - {{{"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, - {{{"execvpe"}}, TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, - {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execl"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execle"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execlp"}}, TR::Sink({{}, {0}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execv"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execve"}}, + TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"fexecve"}}, + TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execvp"}}, TR::Sink({{0, 1}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"execvpe"}}, + TR::Sink({{0, 1, 2}}, MsgSanitizeSystemArgs)}, + {{CDM::CLibrary, {"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)}, // malloc, calloc, alloca, realloc, memccpy // are intentionally not marked as taint sinks because unconditional // reporting for these functions generates many false positives. // These taint sinks should be implemented in other checkers with more // sophisticated sanitation heuristics. - {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, - {{{{"setproctitle_fast"}}}, + + {{CDM::CLibrary, {"setproctitle"}}, + TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}, + {{CDM::CLibrary, {"setproctitle_fast"}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)}}; - // `getenv` returns taint only in untrusted environments. if (TR::UntrustedEnv(C)) { // void setproctitle_init(int argc, char *argv[], char *envp[]) + // TODO: replace `MsgCustomSink` with a message that fits this situation. + GlobalCRules.push_back({{CDM::CLibrary, {"setproctitle_init"}}, + TR::Sink({{1, 2}}, MsgCustomSink)}); + + // `getenv` returns taint only in untrusted environments. GlobalCRules.push_back( - {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)}); - GlobalCRules.push_back({{{"getenv"}}, TR::Source({{ReturnValueIndex}})}); + {{CDM::CLibrary, {"getenv"}}, TR::Source({{ReturnValueIndex}})}); } StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),