Skip to content

Commit

Permalink
Soft-deprecate String constructors and assignment operators that impl…
Browse files Browse the repository at this point in the history
…icitly parse the given string to find its end. Add known-length constructors and explicit from_c_str static methods.
  • Loading branch information
Ivorforce committed Dec 2, 2024
1 parent 0eadbdb commit 10e14e0
Show file tree
Hide file tree
Showing 4 changed files with 231 additions and 209 deletions.
211 changes: 38 additions & 173 deletions core/string/ustring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -304,95 +304,43 @@ Error String::parse_url(String &r_scheme, String &r_host, int &r_port, String &r
return OK;
}

void String::copy_from(const char *p_cstr) {
// copy Latin-1 encoded c-string directly
if (!p_cstr) {
resize(0);
return;
}

const size_t len = strlen(p_cstr);

if (len == 0) {
void String::copy_from(const StrRange<char> &p_cstr) {
if (p_cstr.len == 0) {
resize(0);
return;
}

resize(len + 1); // include 0
resize(p_cstr.len + 1); // include 0

const char *src = p_cstr.c_str;
const char *end = src + p_cstr.len;
char32_t *dst = ptrw();

for (size_t i = 0; i <= len; i++) {
#if CHAR_MIN == 0
uint8_t c = p_cstr[i];
#else
uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
#endif
if (c == 0 && i < len) {
print_unicode_error("NUL character", true);
dst[i] = _replacement_char;
} else {
dst[i] = c;
}
for (; src < end; ++src, ++dst) {
// If char is int8_t, a set sign bit will be reinterpreted as 256 - val implicitly.
*dst = static_cast<uint8_t>(*src);
}
*dst = 0;
}

void String::copy_from(const char *p_cstr, const int p_clip_to) {
// copy Latin-1 encoded c-string directly
if (!p_cstr) {
resize(0);
return;
}

int len = 0;
const char *ptr = p_cstr;
while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
len++;
}

if (len == 0) {
resize(0);
return;
}

resize(len + 1); // include 0

char32_t *dst = ptrw();

for (int i = 0; i < len; i++) {
#if CHAR_MIN == 0
uint8_t c = p_cstr[i];
#else
uint8_t c = p_cstr[i] >= 0 ? p_cstr[i] : uint8_t(256 + p_cstr[i]);
#endif
if (c == 0) {
print_unicode_error("NUL character", true);
dst[i] = _replacement_char;
} else {
dst[i] = c;
}
}
dst[len] = 0;
}

void String::copy_from(const wchar_t *p_cstr) {
void String::copy_from(const StrRange<wchar_t> &p_cstr) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
parse_utf16((const char16_t *)p_cstr);
// TODO parse_utf16 does not currently support passing in a known length to save time.
parse_utf16((const char16_t *)p_cstr.c_str);
#else
// wchar_t is 32-bit, copy directly
copy_from((const char32_t *)p_cstr);
copy_from((StrRange<char32_t> &)p_cstr);
#endif
}

void String::copy_from(const wchar_t *p_cstr, const int p_clip_to) {
#ifdef WINDOWS_ENABLED
// wchar_t is 16-bit, parse as UTF-16
parse_utf16((const char16_t *)p_cstr, p_clip_to);
#else
// wchar_t is 32-bit, copy directly
copy_from((const char32_t *)p_cstr, p_clip_to);
#endif
void String::copy_from(const StrRange<char32_t> &p_cstr) {
if (p_cstr.len == 0) {
resize(0);
return;
}

copy_from_unchecked(p_cstr.c_str, p_cstr.len);
}

void String::copy_from(const char32_t &p_char) {
Expand All @@ -418,84 +366,42 @@ void String::copy_from(const char32_t &p_char) {
dst[1] = 0;
}

void String::copy_from(const char32_t *p_cstr) {
if (!p_cstr) {
resize(0);
return;
}

int len = 0;
const char32_t *ptr = p_cstr;
while (*(ptr++) != 0) {
len++;
}

if (len == 0) {
resize(0);
return;
}

copy_from_unchecked(p_cstr, len);
}

void String::copy_from(const char32_t *p_cstr, const int p_clip_to) {
if (!p_cstr) {
resize(0);
return;
}

int len = 0;
const char32_t *ptr = p_cstr;
while ((p_clip_to < 0 || len < p_clip_to) && *(ptr++) != 0) {
len++;
}

if (len == 0) {
resize(0);
return;
}

copy_from_unchecked(p_cstr, len);
}

// assumes the following have already been validated:
// p_char != nullptr
// p_length > 0
// p_length <= p_char strlen
void String::copy_from_unchecked(const char32_t *p_char, const int p_length) {
resize(p_length + 1);

const char32_t *end = p_char + p_length;
char32_t *dst = ptrw();
dst[p_length] = 0;

for (int i = 0; i < p_length; i++) {
if (p_char[i] == 0) {
print_unicode_error("NUL character", true);
dst[i] = _replacement_char;
for (; p_char < end; ++p_char, ++dst) {
const char32_t chr = *p_char;
if ((chr & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)chr));
*dst = _replacement_char;
continue;
}
if ((p_char[i] & 0xfffff800) == 0xd800) {
print_unicode_error(vformat("Unpaired surrogate (%x)", (uint32_t)p_char[i]));
dst[i] = _replacement_char;
if (chr > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)chr));
*dst = _replacement_char;
continue;
}
if (p_char[i] > 0x10ffff) {
print_unicode_error(vformat("Invalid unicode codepoint (%x)", (uint32_t)p_char[i]));
dst[i] = _replacement_char;
continue;
}
dst[i] = p_char[i];
*dst = chr;
}
*dst = 0;
}

void String::operator=(const char *p_str) {
void String::operator=(const StrRange<char> &p_str) {
copy_from(p_str);
}

void String::operator=(const char32_t *p_str) {
void String::operator=(const StrRange<char32_t> &p_str) {
copy_from(p_str);
}

void String::operator=(const wchar_t *p_str) {
void String::operator=(const StrRange<wchar_t> &p_str) {
copy_from(p_str);
}

Expand Down Expand Up @@ -629,12 +535,7 @@ String &String::operator+=(char32_t p_char) {

bool String::operator==(const char *p_str) const {
// compare Latin-1 encoded c-string
int len = 0;
const char *aux = p_str;

while (*(aux++) != 0) {
len++;
}
int len = strlen(p_str);

if (length() != len) {
return false;
Expand Down Expand Up @@ -668,12 +569,7 @@ bool String::operator==(const wchar_t *p_str) const {
}

bool String::operator==(const char32_t *p_str) const {
int len = 0;
const char32_t *aux = p_str;

while (*(aux++) != 0) {
len++;
}
const int len = strlen(p_str);

if (length() != len) {
return false;
Expand Down Expand Up @@ -719,7 +615,7 @@ bool String::operator==(const String &p_str) const {
return true;
}

bool String::operator==(const StrRange &p_str_range) const {
bool String::operator==(const StrRange<char32_t> &p_str_range) const {
int len = p_str_range.len;

if (length() != len) {
Expand Down Expand Up @@ -2525,37 +2421,6 @@ Char16String String::utf16() const {
return utf16s;
}

String::String(const char *p_str) {
copy_from(p_str);
}

String::String(const wchar_t *p_str) {
copy_from(p_str);
}

String::String(const char32_t *p_str) {
copy_from(p_str);
}

String::String(const char *p_str, int p_clip_to_len) {
copy_from(p_str, p_clip_to_len);
}

String::String(const wchar_t *p_str, int p_clip_to_len) {
copy_from(p_str, p_clip_to_len);
}

String::String(const char32_t *p_str, int p_clip_to_len) {
copy_from(p_str, p_clip_to_len);
}

String::String(const StrRange &p_range) {
if (!p_range.c_str) {
return;
}
copy_from(p_range.c_str, p_range.len);
}

int64_t String::hex_to_int() const {
int len = length();
if (len == 0) {
Expand Down
Loading

0 comments on commit 10e14e0

Please sign in to comment.