From 012e6aeb767fd7565ff749519b94e61f0ff19774 Mon Sep 17 00:00:00 2001 From: Matt Brown Date: Sun, 10 Mar 2024 14:35:52 -0400 Subject: [PATCH] Use build.rs instead of macro --- src/str/build.rs | 222 +++++++++++++++++++++++++++++++++++++++++++ src/str/lib.rs | 141 +-------------------------- src/str/str_macro.rs | 24 ----- 3 files changed, 227 insertions(+), 160 deletions(-) create mode 100644 src/str/build.rs delete mode 100644 src/str/str_macro.rs diff --git a/src/str/build.rs b/src/str/build.rs new file mode 100644 index 00000000..189c807b --- /dev/null +++ b/src/str/build.rs @@ -0,0 +1,222 @@ +use std::env; +use std::fs::File; +use std::io::{Result, Write}; +use std::path::Path; + +fn main() -> Result<()> { + let out_dir = env::var("OUT_DIR").unwrap(); + let dest_path = Path::new(&out_dir).join("interned_strings.rs"); + let mut f = File::create(dest_path)?; + + let strings = vec![ + "", + "", + "", + "", + "Codegen", + "DOMDocument", + "DateTime", + "DateTimeImmutable", + "HH\\AnyArray", + "HH\\Asio\\join", + "HH\\AsyncIterator", + "HH\\AsyncKeyedIterator", + "HH\\Awaitable", + "HH\\BuiltinEnum", + "HH\\BuiltinEnumClass", + "HH\\Container", + "HH\\EnumClass\\Label", + "HH\\FormatString", + "HH\\Iterator", + "HH\\KeyedContainer", + "HH\\KeyedIterator", + "HH\\KeyedTraversable", + "HH\\Lib\\C\\contains", + "HH\\Lib\\C\\contains_key", + "HH\\Lib\\C\\firstx", + "HH\\Lib\\C\\lastx", + "HH\\Lib\\C\\onlyx", + "HH\\Lib\\Dict\\contains", + "HH\\Lib\\Dict\\contains_key", + "HH\\Lib\\Math\\INT32_MAX", + "HH\\Lib\\Regex\\Pattern", + "HH\\Lib\\Regex\\matches", + "HH\\Lib\\Str\\format", + "HH\\Lib\\Str\\join", + "HH\\Lib\\Str\\replace", + "HH\\Lib\\Str\\slice", + "HH\\Lib\\Str\\split", + "HH\\Lib\\Str\\starts_with", + "HH\\Lib\\Str\\strip_suffix", + "HH\\Lib\\Str\\trim", + "HH\\MemberOf", + "HH\\Shapes", + "HH\\Traversable", + "HH\\TypeStructure", + "HH\\Vector", + "HH\\global_get", + "HH\\idx", + "HH\\invariant", + "HH\\invariant_violation", + "HH\\set_frame_metadata", + "HH\\type_structure", + "Hakana\\FindPaths\\Sanitize", + "Hakana\\MustUse", + "Hakana\\SecurityAnalysis\\IgnorePath", + "Hakana\\SecurityAnalysis\\IgnorePathIfTrue", + "Hakana\\SecurityAnalysis\\Sanitize", + "Hakana\\SecurityAnalysis\\ShapeSource", + "Hakana\\SecurityAnalysis\\Source", + "Hakana\\SecurityAnalysis\\SpecializeCall", + "Hakana\\SpecialTypes\\LiteralString", + "NumberFormatter", + "SimpleXMLElement", + "XHPChild", + "__DIR__", + "__DynamicallyCallable", + "__EntryPoint", + "__FILE__", + "__FUNCTION__", + "__PHP_Incomplete_Class", + "__construct", + "assert", + "assertAll", + "at", + "class_exists", + "coerce", + "debug_backtrace", + "dirname", + "echo", + "fromItems", + "function_exists", + "include", + "isset", + "keyExists", + "microtime", + "parent", + "preg_replace", + "preg_split", + "range", + "removeKey", + "self", + "static", + "stdClass", + "str_replace", + "this", + "toArray", + "toDict", + "trigger_error", + "unset", + "base64_decode", + "basename", + "date", + "date_format", + "file_get_contents", + "hash_equals", + "hash_hmac", + "hex2bin", + "idx", + "in_array", + "json_encode", + "ltrim", + "mb_strlen", + "mb_strtolower", + "mb_strtoupper", + "md5", + "mktime", + "password_hash", + "rand", + "realpath", + "rtrim", + "sha1", + "str_repeat", + "strpad", + "strtolower", + "strtotime", + "strtoupper", + "trim", + "utf8_encode", + "vsprintf", + ]; + + writeln!(f, "impl StrId {{")?; + for (i, name) in strings.iter().enumerate() { + let const_name = format_identifier(name); + writeln!(f, " pub const {}: StrId = StrId({});", const_name, i)?; + } + writeln!(f, "}}")?; + + writeln!(f, "impl Default for Interner {{")?; + writeln!(f, " fn default() -> Self {{")?; + writeln!( + f, + " let mut interner = Interner {{ map: IndexSet::default() }};" + )?; + for name in &strings { + writeln!( + f, + " interner.intern(\"{}\".to_string());", + name.replace('\\', "\\\\") + )?; + } + writeln!(f, " interner")?; + writeln!(f, " }}")?; + writeln!(f, "}}")?; + + Ok(()) +} + +fn format_identifier(input: &str) -> String { + if input.is_empty() { + return "EMPTY".to_string(); + } + + if input == "HH\\type_structure" { + return "TYPE_STRUCTURE_FN".to_string(); + } + + if input == "HH\\idx" { + return "IDX_FN".to_string(); + } + + if input.starts_with("__") && input.ends_with("__") { + return input[2..input.len() - 2].to_string() + "_CONST"; + } + + let mut formatted_input = input.to_string(); + + // Strip "HH\\" prefix if present + formatted_input = formatted_input + .trim_start_matches("HH\\") + .trim_start_matches("__") + .to_string(); + + // Replace "\\" with "_" for namespaced constants + formatted_input = formatted_input + .replace('\\', "_") + .replace(['<', '>'], "") + .replace(' ', "_"); + + let mut result = String::new(); + let mut was_lower = false; + + for (i, ch) in formatted_input.chars().enumerate() { + // Convert camelCase to CAMEL_CASE + if ch.is_uppercase() { + if i != 0 && was_lower { + result.push('_'); + } + result.extend(ch.to_lowercase()); + } else { + result.push(ch); + } + + was_lower = ch.is_lowercase(); + } + + // Convert to uppercase + result + .to_uppercase() + .replace("XHPCHILD", "XHP_CHILD") + .replace("SIMPLE_XMLELEMENT", "SIMPLE_XML_ELEMENT") +} diff --git a/src/str/lib.rs b/src/str/lib.rs index 284b31a6..88966c6b 100644 --- a/src/str/lib.rs +++ b/src/str/lib.rs @@ -8,140 +8,14 @@ use indexmap::{IndexMap, IndexSet}; use rustc_hash::{self, FxHashMap, FxHasher}; use serde::{Deserialize, Serialize}; -mod str_macro; - #[derive(Debug, Clone, Copy, Eq, PartialEq, Hash, Serialize, Deserialize, PartialOrd, Ord)] pub struct StrId(pub u32); -interned_strings! { - EMPTY, 0 => "", - ANONYMOUS_FUNCTION, 1 => "", - ARIA_ATTRIBUTE, 2 => "", - DATA_ATTRIBUTE, 3 => "", - CODEGEN, 4 => "Codegen", - DOMDOCUMENT, 5 => "DOMDocument", - DATE_TIME, 6 => "DateTime", - DATE_TIME_IMMUTABLE, 7 => "DateTimeImmutable", - ANY_ARRAY, 8 => "HH\\AnyArray", - ASIO_JOIN, 9 => "HH\\Asio\\join", - ASYNC_ITERATOR, 10 => "HH\\AsyncIterator", - ASYNC_KEYED_ITERATOR, 11 => "HH\\AsyncKeyedIterator", - AWAITABLE, 12 => "HH\\Awaitable", - BUILTIN_ENUM, 13 => "HH\\BuiltinEnum", - BUILTIN_ENUM_CLASS, 14 => "HH\\BuiltinEnumClass", - CONTAINER, 15 => "HH\\Container", - ENUM_CLASS_LABEL, 16 => "HH\\EnumClass\\Label", - FORMAT_STRING, 17 => "HH\\FormatString", - ITERATOR, 18 => "HH\\Iterator", - KEYED_CONTAINER, 19 => "HH\\KeyedContainer", - KEYED_ITERATOR, 20 => "HH\\KeyedIterator", - KEYED_TRAVERSABLE, 21 => "HH\\KeyedTraversable", - LIB_C_CONTAINS, 22 => "HH\\Lib\\C\\contains", - LIB_C_CONTAINS_KEY, 23 => "HH\\Lib\\C\\contains_key", - LIB_C_FIRSTX, 24 => "HH\\Lib\\C\\firstx", - LIB_C_LASTX, 25 => "HH\\Lib\\C\\lastx", - LIB_C_ONLYX, 26 => "HH\\Lib\\C\\onlyx", - LIB_DICT_CONTAINS, 27 => "HH\\Lib\\Dict\\contains", - LIB_DICT_CONTAINS_KEY, 28 => "HH\\Lib\\Dict\\contains_key", - LIB_MATH_INT32_MAX, 29 => "HH\\Lib\\Math\\INT32_MAX", - LIB_REGEX_PATTERN, 30 => "HH\\Lib\\Regex\\Pattern", - LIB_REGEX_MATCHES, 31 => "HH\\Lib\\Regex\\matches", - LIB_STR_FORMAT, 32 => "HH\\Lib\\Str\\format", - LIB_STR_JOIN, 33 => "HH\\Lib\\Str\\join", - LIB_STR_REPLACE, 34 => "HH\\Lib\\Str\\replace", - LIB_STR_SLICE, 35 => "HH\\Lib\\Str\\slice", - LIB_STR_SPLIT, 36 => "HH\\Lib\\Str\\split", - LIB_STR_STARTS_WITH, 37 => "HH\\Lib\\Str\\starts_with", - LIB_STR_STRIP_SUFFIX, 38 => "HH\\Lib\\Str\\strip_suffix", - LIB_STR_TRIM, 39 => "HH\\Lib\\Str\\trim", - MEMBER_OF, 40 => "HH\\MemberOf", - SHAPES, 41 => "HH\\Shapes", - TRAVERSABLE, 42 => "HH\\Traversable", - TYPE_STRUCTURE, 43 => "HH\\TypeStructure", - VECTOR, 44 => "HH\\Vector", - GLOBAL_GET, 45 => "HH\\global_get", - IDX_FN, 46 => "HH\\idx", - INVARIANT, 47 => "HH\\invariant", - INVARIANT_VIOLATION, 48 => "HH\\invariant_violation", - SET_FRAME_METADATA, 49 => "HH\\set_frame_metadata", - TYPE_STRUCTURE_FN, 50 => "HH\\type_structure", - HAKANA_FIND_PATHS_SANITIZE, 51 => "Hakana\\FindPaths\\Sanitize", - HAKANA_MUST_USE, 52 => "Hakana\\MustUse", - HAKANA_SECURITY_ANALYSIS_IGNORE_PATH, 53 => "Hakana\\SecurityAnalysis\\IgnorePath", - HAKANA_SECURITY_ANALYSIS_IGNORE_PATH_IF_TRUE, 54 => "Hakana\\SecurityAnalysis\\IgnorePathIfTrue", - HAKANA_SECURITY_ANALYSIS_SANITIZE, 55 => "Hakana\\SecurityAnalysis\\Sanitize", - HAKANA_SECURITY_ANALYSIS_SHAPE_SOURCE, 56 => "Hakana\\SecurityAnalysis\\ShapeSource", - HAKANA_SECURITY_ANALYSIS_SOURCE, 57 => "Hakana\\SecurityAnalysis\\Source", - HAKANA_SECURITY_ANALYSIS_SPECIALIZE_CALL, 58 => "Hakana\\SecurityAnalysis\\SpecializeCall", - HAKANA_SPECIAL_TYPES_LITERAL_STRING, 59 => "Hakana\\SpecialTypes\\LiteralString", - NUMBER_FORMATTER, 60 => "NumberFormatter", - SIMPLE_XML_ELEMENT, 61 => "SimpleXMLElement", - XHP_CHILD, 62 => "XHPChild", - DIR_CONST, 63 => "__DIR__", - DYNAMICALLY_CALLABLE, 64 => "__DynamicallyCallable", - ENTRY_POINT, 65 => "__EntryPoint", - FILE_CONST, 66 => "__FILE__", - FUNCTION_CONST, 67 => "__FUNCTION__", - PHP_INCOMPLETE_CLASS, 68 => "__PHP_Incomplete_Class", - CONSTRUCT, 69 => "__construct", - ASSERT, 70 => "assert", - ASSERT_ALL, 71 => "assertAll", - AT, 72 => "at", - CLASS_EXISTS, 73 => "class_exists", - COERCE, 74 => "coerce", - DEBUG_BACKTRACE, 75 => "debug_backtrace", - DIRNAME, 76 => "dirname", - ECHO, 77 => "echo", - FROM_ITEMS, 78 => "fromItems", - FUNCTION_EXISTS, 79 => "function_exists", - INCLUDE, 80 => "include", - ISSET, 81 => "isset", - KEY_EXISTS, 82 => "keyExists", - MICROTIME, 83 => "microtime", - PARENT, 84 => "parent", - PREG_REPLACE, 85 => "preg_replace", - PREG_SPLIT, 86 => "preg_split", - RANGE, 87 => "range", - REMOVE_KEY, 88 => "removeKey", - SELF, 89 => "self", - STATIC, 90 => "static", - STD_CLASS, 91 => "stdClass", - STR_REPLACE, 92 => "str_replace", - THIS, 93 => "this", - TO_ARRAY, 94 => "toArray", - TO_DICT, 95 => "toDict", - TRIGGER_ERROR, 96 => "trigger_error", - UNSET, 97 => "unset", - BASE64_DECODE, 98 => "base64_decode", - BASENAME, 99 => "basename", - DATE, 100 => "date", - DATE_FORMAT, 101 => "date_format", - FILE_GET_CONTENTS, 102 => "file_get_contents", - HASH_EQUALS, 103 => "hash_equals", - HASH_HMAC, 104 => "hash_hmac", - HEX2BIN, 105 => "hex2bin", - IDX, 106 => "idx", - IN_ARRAY, 107 => "in_array", - JSON_ENCODE, 108 => "json_encode", - LTRIM, 109 => "ltrim", - MB_STRLEN, 110 => "mb_strlen", - MB_STRTOLOWER, 111 => "mb_strtolower", - MB_STRTOUPPER, 112 => "mb_strtoupper", - MD5, 113 => "md5", - MKTIME, 114 => "mktime", - PASSWORD_HASH, 115 => "password_hash", - RAND, 116 => "rand", - REALPATH, 117 => "realpath", - RTRIM, 118 => "rtrim", - SHA1, 119 => "sha1", - STR_REPEAT, 120 => "str_repeat", - STRPAD, 121 => "strpad", - STRTOLOWER, 122 => "strtolower", - STRTOTIME, 123 => "strtotime", - STRTOUPPER, 124 => "strtoupper", - TRIM, 125 => "trim", - UTF8_ENCODE, 126 => "utf8_encode", - VSPRINTF, 127 => "vsprintf", +include!(concat!(env!("OUT_DIR"), "/interned_strings.rs")); + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct Interner { + map: IndexSet>, } impl StrId { @@ -151,11 +25,6 @@ impl StrId { } } -#[derive(Serialize, Deserialize, Clone, Debug)] -pub struct Interner { - map: IndexSet>, -} - impl Interner { /// Get the id corresponding to `path`. /// diff --git a/src/str/str_macro.rs b/src/str/str_macro.rs deleted file mode 100644 index f81bb1c1..00000000 --- a/src/str/str_macro.rs +++ /dev/null @@ -1,24 +0,0 @@ -#[macro_export] -macro_rules! interned_strings { - ($($name:ident, $id:expr => $value:expr),* $(,)?) => { - impl StrId { - $( - pub const $name: StrId = StrId($id); - )* - } - - impl Default for Interner { - fn default() -> Self { - let mut interner = Interner { - map: IndexSet::default(), - }; - - $( - interner.intern($value.to_string()); - )* - - interner - } - } - }; -}