From c77bf2b4ebb52afecf31e3eddfc3492b91a78f6b Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:11:49 +0000 Subject: [PATCH 1/5] Rust: Add a test for sensitive data. --- .../test/library-tests/sensitivedata/test.rs | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 rust/ql/test/library-tests/sensitivedata/test.rs diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs new file mode 100644 index 000000000000..e0c0c6aa50df --- /dev/null +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -0,0 +1,150 @@ + +fn get_string() -> String { "string".to_string() } + +fn sink(_: T) { } + +// --- tests --- + +struct MyStruct { + harmless: String, + password: String, + password_file_path: String, + password_enabled: String, +} + +impl MyStruct { + fn get_certificate(&self) -> String { return get_string() } + fn get_certificate_url(&self) -> String { return get_string() } + fn get_certificate_file(&self) -> String { return get_string() } +} + +fn get_password() -> String { get_string() } + +fn test_passwords( + password: &str, passwd: &str, my_password: &str, password_str: &str, pass_phrase: &str, + auth_key: &str, authenticationkey: &str, authKey: &str, + harmless: &str, encrypted_password: &str, password_hash: &str, + ms: &MyStruct +) { + // passwords + sink(password); // $ MISSING: sensitive=password + sink(passwd); // $ MISSING: sensitive=password + sink(my_password); // $ MISSING: sensitive=password + sink(password_str); // $ MISSING: sensitive=password + sink(pass_phrase); // $ MISSING: sensitive=password + sink(auth_key); // $ MISSING: sensitive=password + sink(authenticationkey); // $ MISSING: sensitive=password + sink(authKey); // $ MISSING: sensitive=password + + sink(ms); // $ MISSING: sensitive=password + sink(ms.password.as_str()); // $ MISSING: sensitive=password + + sink(get_password()); // $ MISSING: sensitive=password + let password2 = get_string(); + sink(password2); // $ MISSING: sensitive=password + + // not passwords + sink(harmless); + sink(encrypted_password); + sink(password_hash); + + sink(ms.harmless.as_str()); + sink(ms.password_file_path.as_str()); + sink(ms.password_enabled.as_str()); + + sink(get_string()); + let harmless2 = get_string(); + sink(harmless2); +} + +fn generate_secret_key() -> String { get_string() } +fn get_secure_key() -> String { get_string() } +fn get_private_key() -> String { get_string() } +fn get_public_key() -> String { get_string() } +fn get_secret_token() -> String { get_string() } +fn get_next_token() -> String { get_string() } + +fn test_credentials( + account_key: &str, accnt_key: &str, license_key: &str, secret_key: &str, is_secret: bool, num_accounts: i64, uid: i64, + ms: &MyStruct +) { + // credentials + sink(account_key); // $ MISSING: sensitive=secret + sink(accnt_key); // $ MISSING: sensitive=secret + sink(license_key); // $ MISSING: sensitive=secret + sink(secret_key); // $ MISSING: sensitive=secret + + sink(ms.get_certificate()); // $ MISSING: sensitive=certificate + + sink(generate_secret_key()); // $ MISSING: sensitive=secret + sink(get_secure_key()); // $ MISSING: sensitive=secret + sink(get_private_key()); // $ MISSING: sensitive=secret + sink(get_secret_token()); // $ MISSING: sensitive=secret + + // not credentials + sink(is_secret); + sink(num_accounts); + sink(uid); + + sink(ms.get_certificate_url()); + sink(ms.get_certificate_file()); + + sink(get_public_key()); + sink(get_next_token()); +} + +struct Financials { + harmless: String, + my_bank_account_number: String, + credit_card_no: String, + credit_rating: i32, + user_ccn: String +} + +struct MyPrivateInfo { + mobile_phone_num: String, + contact_email: String, + contact_e_mail_2: String, + my_ssn: String, + birthday: String, + emergency_contact: String, + name_of_employer: String, + + medical_notes: Vec, + latitude: f64, + longitude: Option, + + financials: Financials +} + +fn test_private_info( + info: &MyPrivateInfo +) { + // private info + sink(info.mobile_phone_num.as_str()); // $ MISSING: sensitive=private + sink(info.mobile_phone_num.to_string()); // $ MISSING: sensitive=private + sink(info.contact_email.as_str()); // $ MISSING: sensitive=private + sink(info.contact_e_mail_2.as_str()); // $ MISSING: sensitive=private + sink(info.my_ssn.as_str()); // $ MISSING: sensitive=private + sink(info.birthday.as_str()); // $ MISSING: sensitive=private + sink(info.emergency_contact.as_str()); // $ MISSING: sensitive=private + sink(info.name_of_employer.as_str()); // $ MISSING: sensitive=private + + sink(&info.medical_notes); // $ MISSING: sensitive=private + sink(info.medical_notes[0].as_str()); // $ MISSING: sensitive=private + for n in info.medical_notes.iter() { + sink(n.as_str()); // $ MISSING: sensitive=private + } + + sink(info.latitude); // $ MISSING: sensitive=private + let x = info.longitude.unwrap(); + sink(x); // $ MISSING: sensitive=private + + sink(info.financials.my_bank_account_number.as_str()); // $ MISSING: sensitive=private + sink(info.financials.credit_card_no.as_str()); // $ MISSING: sensitive=private + sink(info.financials.credit_rating); // $ MISSING: sensitive=private + sink(info.financials.user_ccn.as_str()); // $ MISSING: sensitive=private + + // not private info + sink(info.financials.harmless.as_str()); +} From 821eb4f3e648d79741aae304a7bd1d73f419bfe8 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 3 Jan 2025 17:21:57 +0000 Subject: [PATCH 2/5] Rust: Add sensitive data library. --- config/identical-files.json | 3 +- .../codeql/rust/security/SensitiveData.qll | 86 ++++++++ .../internal/SensitiveDataHeuristics.qll | 188 ++++++++++++++++++ .../sensitivedata/SensitiveData.expected | 0 .../sensitivedata/SensitiveData.ql | 36 ++++ .../test/library-tests/sensitivedata/test.rs | 36 ++-- 6 files changed, 330 insertions(+), 19 deletions(-) create mode 100644 rust/ql/lib/codeql/rust/security/SensitiveData.qll create mode 100644 rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll create mode 100644 rust/ql/test/library-tests/sensitivedata/SensitiveData.expected create mode 100644 rust/ql/test/library-tests/sensitivedata/SensitiveData.ql diff --git a/config/identical-files.json b/config/identical-files.json index b066c443d9fd..56aac5604734 100644 --- a/config/identical-files.json +++ b/config/identical-files.json @@ -247,7 +247,8 @@ "javascript/ql/lib/semmle/javascript/security/internal/SensitiveDataHeuristics.qll", "python/ql/lib/semmle/python/security/internal/SensitiveDataHeuristics.qll", "ruby/ql/lib/codeql/ruby/security/internal/SensitiveDataHeuristics.qll", - "swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll" + "swift/ql/lib/codeql/swift/security/internal/SensitiveDataHeuristics.qll", + "rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll" ], "IncompleteUrlSubstringSanitization": [ "javascript/ql/src/Security/CWE-020/IncompleteUrlSubstringSanitization.qll", diff --git a/rust/ql/lib/codeql/rust/security/SensitiveData.qll b/rust/ql/lib/codeql/rust/security/SensitiveData.qll new file mode 100644 index 000000000000..a8164e3ab7cb --- /dev/null +++ b/rust/ql/lib/codeql/rust/security/SensitiveData.qll @@ -0,0 +1,86 @@ +/** + * Provides classes and predicates for identifying sensitive data. + * + * 'Sensitive' data is anything that should not be sent in unencrypted form. This library tries to + * guess where sensitive data may either be stored in a variable or produced by a method. + */ + +import rust +private import internal.SensitiveDataHeuristics +private import codeql.rust.dataflow.DataFlow + +/** + * A data flow node that might contain sensitive data. + */ +cached +abstract class SensitiveData extends DataFlow::Node { + /** + * Gets a classification of the kind of sensitive data this expression might contain. + */ + cached + abstract SensitiveDataClassification getClassification(); +} + +/** + * A function that might produce sensitive data. + */ +private class SensitiveDataFunction extends Function { + SensitiveDataClassification classification; + + SensitiveDataFunction() { + HeuristicNames::nameIndicatesSensitiveData(this.getName().getText(), classification) + } + + SensitiveDataClassification getClassification() { result = classification } +} + +/** + * A function call that might produce sensitive data. + */ +private class SensitiveDataCall extends SensitiveData { + SensitiveDataClassification classification; + + SensitiveDataCall() { + classification = + this.asExpr() + .getAstNode() + .(CallExprBase) + .getStaticTarget() + .(SensitiveDataFunction) + .getClassification() + } + + override SensitiveDataClassification getClassification() { result = classification } +} + +/** + * A variable that might contain sensitive data. + */ +private class SensitiveDataVariable extends Variable { + SensitiveDataClassification classification; + + SensitiveDataVariable() { + HeuristicNames::nameIndicatesSensitiveData(this.getName(), classification) + } + + SensitiveDataClassification getClassification() { result = classification } +} + +/** + * A variable access that might produce sensitive data. + */ +private class SensitiveVariableAccess extends SensitiveData { + SensitiveDataClassification classification; + + SensitiveVariableAccess() { + classification = + this.asExpr() + .getAstNode() + .(VariableAccess) + .getVariable() + .(SensitiveDataVariable) + .getClassification() + } + + override SensitiveDataClassification getClassification() { result = classification } +} diff --git a/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll new file mode 100644 index 000000000000..eb8a0c1fe756 --- /dev/null +++ b/rust/ql/lib/codeql/rust/security/internal/SensitiveDataHeuristics.qll @@ -0,0 +1,188 @@ +/** + * INTERNAL: Do not use. + * + * Provides classes and predicates for identifying strings that may indicate the presence of sensitive data. + * Such that we can share this logic across our CodeQL analysis of different languages. + * + * 'Sensitive' data in general is anything that should not be sent around in unencrypted form. + */ + +/** + * A classification of different kinds of sensitive data: + * + * - secret: generic secret or trusted data; + * - id: a user name or other account information; + * - password: a password or authorization key; + * - certificate: a certificate. + * - private: private data such as credit card numbers + * + * While classifications are represented as strings, this should not be relied upon. + * Instead, use the predicates in `SensitiveDataClassification::` to work with + * classifications. + */ +class SensitiveDataClassification extends string { + SensitiveDataClassification() { this in ["secret", "id", "password", "certificate", "private"] } +} + +/** + * Provides predicates to select the different kinds of sensitive data we support. + */ +module SensitiveDataClassification { + /** Gets the classification for secret or trusted data. */ + SensitiveDataClassification secret() { result = "secret" } + + /** Gets the classification for user names or other account information. */ + SensitiveDataClassification id() { result = "id" } + + /** Gets the classification for passwords or authorization keys. */ + SensitiveDataClassification password() { result = "password" } + + /** Gets the classification for certificates. */ + SensitiveDataClassification certificate() { result = "certificate" } + + /** Gets the classification for private data. */ + SensitiveDataClassification private() { result = "private" } +} + +/** + * INTERNAL: Do not use. + * + * Provides heuristics for identifying names related to sensitive information. + */ +module HeuristicNames { + /** + * Gets a regular expression that identifies strings that may indicate the presence of secret + * or trusted data. + */ + string maybeSecret() { result = "(?is).*((?; + +module SensitiveDataTest implements TestSig { + string getARelevantTag() { result = "sensitive" } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(DataFlow::Node source, DataFlow::Node sink | + SensitiveDataFlow::flow(source, sink) and + location = sink.getLocation() and + element = sink.toString() and + tag = "sensitive" and + value = source.(SensitiveData).getClassification() + ) + } +} + +import MakeTest diff --git a/rust/ql/test/library-tests/sensitivedata/test.rs b/rust/ql/test/library-tests/sensitivedata/test.rs index e0c0c6aa50df..858526642c61 100644 --- a/rust/ql/test/library-tests/sensitivedata/test.rs +++ b/rust/ql/test/library-tests/sensitivedata/test.rs @@ -27,21 +27,21 @@ fn test_passwords( ms: &MyStruct ) { // passwords - sink(password); // $ MISSING: sensitive=password - sink(passwd); // $ MISSING: sensitive=password - sink(my_password); // $ MISSING: sensitive=password - sink(password_str); // $ MISSING: sensitive=password + sink(password); // $ sensitive=password + sink(passwd); // $ sensitive=password + sink(my_password); // $ sensitive=password + sink(password_str); // $ sensitive=password sink(pass_phrase); // $ MISSING: sensitive=password sink(auth_key); // $ MISSING: sensitive=password - sink(authenticationkey); // $ MISSING: sensitive=password - sink(authKey); // $ MISSING: sensitive=password + sink(authenticationkey); // $ sensitive=password + sink(authKey); // $ sensitive=password sink(ms); // $ MISSING: sensitive=password sink(ms.password.as_str()); // $ MISSING: sensitive=password - sink(get_password()); // $ MISSING: sensitive=password + sink(get_password()); // $ sensitive=password let password2 = get_string(); - sink(password2); // $ MISSING: sensitive=password + sink(password2); // $ sensitive=password // not passwords sink(harmless); @@ -69,25 +69,25 @@ fn test_credentials( ms: &MyStruct ) { // credentials - sink(account_key); // $ MISSING: sensitive=secret - sink(accnt_key); // $ MISSING: sensitive=secret + sink(account_key); // $ sensitive=id + sink(accnt_key); // $ sensitive=id sink(license_key); // $ MISSING: sensitive=secret - sink(secret_key); // $ MISSING: sensitive=secret + sink(secret_key); // $ sensitive=secret - sink(ms.get_certificate()); // $ MISSING: sensitive=certificate + sink(ms.get_certificate()); // $ sensitive=certificate - sink(generate_secret_key()); // $ MISSING: sensitive=secret + sink(generate_secret_key()); // $ sensitive=secret sink(get_secure_key()); // $ MISSING: sensitive=secret sink(get_private_key()); // $ MISSING: sensitive=secret - sink(get_secret_token()); // $ MISSING: sensitive=secret + sink(get_secret_token()); // $ sensitive=secret // not credentials sink(is_secret); - sink(num_accounts); - sink(uid); + sink(num_accounts); // $ SPURIOUS: sensitive=id + sink(uid); // $ SPURIOUS: sensitive=id - sink(ms.get_certificate_url()); - sink(ms.get_certificate_file()); + sink(ms.get_certificate_url()); // $ SPURIOUS: sensitive=certificate + sink(ms.get_certificate_file()); // $ SPURIOUS: sensitive=certificate sink(get_public_key()); sink(get_next_token()); From e1e980c2e80d1e9e837b3a1a2cecdf890373f780 Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Fri, 3 Jan 2025 18:57:42 +0000 Subject: [PATCH 3/5] Rust: Add sensitive data to summary queries. --- rust/ql/src/queries/summary/SensitiveData.ql | 15 +++++++++++++++ rust/ql/src/queries/summary/SummaryStats.ql | 3 +++ .../query-tests/diagnostics/SummaryStats.expected | 1 + 3 files changed, 19 insertions(+) create mode 100644 rust/ql/src/queries/summary/SensitiveData.ql diff --git a/rust/ql/src/queries/summary/SensitiveData.ql b/rust/ql/src/queries/summary/SensitiveData.ql new file mode 100644 index 000000000000..86686ab0432e --- /dev/null +++ b/rust/ql/src/queries/summary/SensitiveData.ql @@ -0,0 +1,15 @@ +/** + * @name Sensitive Data + * @description List all sensitive data found in the database. Sensitive data is anything that + * should not be sent in unencrypted form. + * @kind problem + * @problem.severity info + * @id rust/summary/sensitive-data + * @tags summary + */ + +import rust +import codeql.rust.security.SensitiveData + +from SensitiveData d +select d, "Sensitive data (" + d.getClassification() + "): " + d.toString() diff --git a/rust/ql/src/queries/summary/SummaryStats.ql b/rust/ql/src/queries/summary/SummaryStats.ql index 2e30fde143aa..005233f87cf3 100644 --- a/rust/ql/src/queries/summary/SummaryStats.ql +++ b/rust/ql/src/queries/summary/SummaryStats.ql @@ -8,6 +8,7 @@ import rust import codeql.rust.Concepts +import codeql.rust.security.SensitiveData import codeql.rust.Diagnostics import Stats @@ -56,4 +57,6 @@ where key = "Taint sources - total" and value = count(ThreatModelSource s) or key = "Taint sources - active" and value = count(ActiveThreatModelSource s) + or + key = "Sensitive data" and value = count(SensitiveData d) select key, value order by key diff --git a/rust/ql/test/query-tests/diagnostics/SummaryStats.expected b/rust/ql/test/query-tests/diagnostics/SummaryStats.expected index a5295af6e104..c8c0fe398aaf 100644 --- a/rust/ql/test/query-tests/diagnostics/SummaryStats.expected +++ b/rust/ql/test/query-tests/diagnostics/SummaryStats.expected @@ -14,5 +14,6 @@ | Macro calls - resolved | 8 | | Macro calls - total | 9 | | Macro calls - unresolved | 1 | +| Sensitive data | 0 | | Taint sources - active | 0 | | Taint sources - total | 0 | From f93aac07c2f11182b57dcdf7cdc5bdc9cf1a8cea Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 6 Jan 2025 13:33:34 +0000 Subject: [PATCH 4/5] Rust: Correct / clarify some QLDoc. --- rust/ql/lib/codeql/rust/security/SensitiveData.qll | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rust/ql/lib/codeql/rust/security/SensitiveData.qll b/rust/ql/lib/codeql/rust/security/SensitiveData.qll index a8164e3ab7cb..ac17a3ee0752 100644 --- a/rust/ql/lib/codeql/rust/security/SensitiveData.qll +++ b/rust/ql/lib/codeql/rust/security/SensitiveData.qll @@ -15,7 +15,7 @@ private import codeql.rust.dataflow.DataFlow cached abstract class SensitiveData extends DataFlow::Node { /** - * Gets a classification of the kind of sensitive data this expression might contain. + * Gets the classification of sensitive data this expression might contain. */ cached abstract SensitiveDataClassification getClassification(); @@ -35,7 +35,7 @@ private class SensitiveDataFunction extends Function { } /** - * A function call that might produce sensitive data. + * A function call data flow node that might produce sensitive data. */ private class SensitiveDataCall extends SensitiveData { SensitiveDataClassification classification; @@ -67,7 +67,7 @@ private class SensitiveDataVariable extends Variable { } /** - * A variable access that might produce sensitive data. + * A variable access data flow node that might produce sensitive data. */ private class SensitiveVariableAccess extends SensitiveData { SensitiveDataClassification classification; From 9d178ab8d69d55b5a05d6be2c5b2c3a679d507fa Mon Sep 17 00:00:00 2001 From: Geoffrey White <40627776+geoffw0@users.noreply.github.com> Date: Mon, 6 Jan 2025 14:05:02 +0000 Subject: [PATCH 5/5] Rust: Fix the failing integration tests. --- rust/ql/integration-tests/hello-project/summary.expected | 1 + rust/ql/integration-tests/hello-workspace/summary.cargo.expected | 1 + .../hello-workspace/summary.rust-project.expected | 1 + 3 files changed, 3 insertions(+) diff --git a/rust/ql/integration-tests/hello-project/summary.expected b/rust/ql/integration-tests/hello-project/summary.expected index 5972bf15827e..07c48c7a5b75 100644 --- a/rust/ql/integration-tests/hello-project/summary.expected +++ b/rust/ql/integration-tests/hello-project/summary.expected @@ -14,5 +14,6 @@ | Macro calls - resolved | 2 | | Macro calls - total | 2 | | Macro calls - unresolved | 0 | +| Sensitive data | 0 | | Taint sources - active | 0 | | Taint sources - total | 0 | diff --git a/rust/ql/integration-tests/hello-workspace/summary.cargo.expected b/rust/ql/integration-tests/hello-workspace/summary.cargo.expected index 27545551f123..eb8f861f9358 100644 --- a/rust/ql/integration-tests/hello-workspace/summary.cargo.expected +++ b/rust/ql/integration-tests/hello-workspace/summary.cargo.expected @@ -14,5 +14,6 @@ | Macro calls - resolved | 2 | | Macro calls - total | 2 | | Macro calls - unresolved | 0 | +| Sensitive data | 0 | | Taint sources - active | 0 | | Taint sources - total | 0 | diff --git a/rust/ql/integration-tests/hello-workspace/summary.rust-project.expected b/rust/ql/integration-tests/hello-workspace/summary.rust-project.expected index 40992231f2bd..5c57c488fda0 100644 --- a/rust/ql/integration-tests/hello-workspace/summary.rust-project.expected +++ b/rust/ql/integration-tests/hello-workspace/summary.rust-project.expected @@ -14,5 +14,6 @@ | Macro calls - resolved | 2 | | Macro calls - total | 2 | | Macro calls - unresolved | 0 | +| Sensitive data | 0 | | Taint sources - active | 0 | | Taint sources - total | 0 |