Skip to content

Commit

Permalink
#292, #268 Add C string and raw C string literal styles SCE_RUST_CSTR…
Browse files Browse the repository at this point in the history
…ING and SCE_RUST_CSTRINGR.
  • Loading branch information
Ekopalypse authored and nyamatongwe committed Nov 30, 2024
1 parent 32b2065 commit 8767cc9
Show file tree
Hide file tree
Showing 7 changed files with 175 additions and 31 deletions.
6 changes: 6 additions & 0 deletions doc/LexillaHistory.html
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ <h2>Contributors</h2>
<td>RainRat</td>
</tr><tr>
<td>Henrik S. Johansen</td>
<td>Ekopalypse</td>
</tr>
</table>
<h2>Releases</h2>
Expand Down Expand Up @@ -615,6 +616,11 @@ <h3>
<a href="https://github.com/ScintillaOrg/lexilla/issues/288">Issue #288</a>.
</li>
<li>
Rust: Add C string and raw C string literal styles SCE_RUST_CSTRING and SCE_RUST_CSTRINGR.
<a href="https://github.com/ScintillaOrg/lexilla/pull/292">Pull request #292</a>,
<a href="https://github.com/ScintillaOrg/lexilla/issues/268">Issue #268</a>.
</li>
<li>
TOML: Don't treat keys without values as errors.
<a href="https://github.com/ScintillaOrg/lexilla/pull/283">Pull request #283</a>.
</li>
Expand Down
2 changes: 2 additions & 0 deletions include/LexicalStyles.iface
Original file line number Diff line number Diff line change
Expand Up @@ -2023,6 +2023,8 @@ val SCE_RUST_LEXERROR=20
val SCE_RUST_BYTESTRING=21
val SCE_RUST_BYTESTRINGR=22
val SCE_RUST_BYTECHARACTER=23
val SCE_RUST_CSTRING=24
val SCE_RUST_CSTRINGR=25
# Lexical states for SCLEX_DMAP
lex DMAP=SCLEX_DMAP SCE_DMAP_
val SCE_DMAP_DEFAULT=0
Expand Down
2 changes: 2 additions & 0 deletions include/SciLexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -1810,6 +1810,8 @@
#define SCE_RUST_BYTESTRING 21
#define SCE_RUST_BYTESTRINGR 22
#define SCE_RUST_BYTECHARACTER 23
#define SCE_RUST_CSTRING 24
#define SCE_RUST_CSTRINGR 25
#define SCE_DMAP_DEFAULT 0
#define SCE_DMAP_COMMENT 1
#define SCE_DMAP_NUMBER 2
Expand Down
88 changes: 57 additions & 31 deletions lexers/LexRust.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,23 @@
using namespace Scintilla;
using namespace Lexilla;

static const int NUM_RUST_KEYWORD_LISTS = 7;
static const int MAX_RUST_IDENT_CHARS = 1023;
namespace {

static bool IsStreamCommentStyle(int style) {
constexpr int NUM_RUST_KEYWORD_LISTS = 7;
constexpr int MAX_RUST_IDENT_CHARS = 1023;


enum class StringType : int {
STRING = SCE_RUST_STRING,
BYTESTRING = SCE_RUST_BYTESTRING,
CSTRING = SCE_RUST_CSTRING,

RAW_STRING = SCE_RUST_STRINGR,
RAW_BYTESTRING = SCE_RUST_BYTESTRINGR,
RAW_CSTRING = SCE_RUST_CSTRINGR
};

static bool IsStreamCommentStyle(int style) noexcept {
return style == SCE_RUST_COMMENTBLOCK ||
style == SCE_RUST_COMMENTBLOCKDOC;
}
Expand Down Expand Up @@ -73,15 +86,15 @@ struct OptionsRust {
};

static const char * const rustWordLists[NUM_RUST_KEYWORD_LISTS + 1] = {
"Primary keywords and identifiers",
"Built in types",
"Other keywords",
"Keywords 4",
"Keywords 5",
"Keywords 6",
"Keywords 7",
0,
};
"Primary keywords and identifiers",
"Built in types",
"Other keywords",
"Keywords 4",
"Keywords 5",
"Keywords 6",
"Keywords 7",
0,
};

struct OptionSetRust : public OptionSet<OptionsRust> {
OptionSetRust() {
Expand Down Expand Up @@ -593,7 +606,7 @@ static void ScanComments(Accessor &styler, Sci_Position& pos, Sci_Position max)
ResumeBlockComment(styler, pos, max, UnknownComment, 1);
}

static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max, StringType string_type) {
int c = styler.SafeGetCharAt(pos, '\0');
bool error = false;
while (c != '"' && !error) {
Expand All @@ -610,7 +623,7 @@ static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max,
} else if (n == 'x') {
pos += 2;
error = !ScanNumericEscape(styler, pos, 2, true);
} else if (n == 'u' && !ascii_only) {
} else if (n == 'u' && (string_type != StringType::BYTESTRING)) {
pos += 2;
if (styler.SafeGetCharAt(pos, '\0') != '{') {
// old-style
Expand All @@ -624,15 +637,15 @@ static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max,
else
error = true;
}
} else if (n == 'U' && !ascii_only) {
} else if (n == 'U' && (string_type != StringType::BYTESTRING)) {
pos += 2;
error = !ScanNumericEscape(styler, pos, 8, true);
} else {
pos += 1;
error = true;
}
} else {
if (ascii_only && !IsASCII((char)c))
if (string_type == StringType::BYTESTRING && !IsASCII((char)c))
error = true;
else
pos++;
Expand All @@ -641,10 +654,11 @@ static void ResumeString(Accessor &styler, Sci_Position& pos, Sci_Position max,
}
if (!error)
pos++;
styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRING : SCE_RUST_STRING);

styler.ColourTo(pos - 1, static_cast<int>(string_type));
}

static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, bool ascii_only) {
static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, int num_hashes, StringType string_type) {
for (;;) {
if (pos == styler.LineEnd(styler.GetLine(pos)))
styler.SetLineState(styler.GetLine(pos), num_hashes);
Expand All @@ -664,15 +678,16 @@ static void ResumeRawString(Accessor &styler, Sci_Position& pos, Sci_Position ma
} else if (pos >= max) {
break;
} else {
if (ascii_only && !IsASCII((char)c))
if ((string_type == StringType::RAW_BYTESTRING) && !IsASCII((char)c))
break;
pos++;
}
}
styler.ColourTo(pos - 1, ascii_only ? SCE_RUST_BYTESTRINGR : SCE_RUST_STRINGR);

styler.ColourTo(pos - 1, static_cast<int>(string_type));
}

static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, bool ascii_only) {
static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max, StringType string_type) {
pos++;
int num_hashes = 0;
while (styler.SafeGetCharAt(pos, '\0') == '#') {
Expand All @@ -683,7 +698,7 @@ static void ScanRawString(Accessor &styler, Sci_Position& pos, Sci_Position max,
styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
} else {
pos++;
ResumeRawString(styler, pos, max, num_hashes, ascii_only);
ResumeRawString(styler, pos, max, num_hashes, string_type);
}
}

Expand All @@ -701,15 +716,18 @@ void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int
} else if (initStyle == SCE_RUST_COMMENTLINE || initStyle == SCE_RUST_COMMENTLINEDOC) {
ResumeLineComment(styler, pos, max, initStyle == SCE_RUST_COMMENTLINEDOC ? DocComment : NotDocComment);
} else if (initStyle == SCE_RUST_STRING) {
ResumeString(styler, pos, max, false);
ResumeString(styler, pos, max, StringType::STRING);
} else if (initStyle == SCE_RUST_BYTESTRING) {
ResumeString(styler, pos, max, true);
ResumeString(styler, pos, max, StringType::BYTESTRING);
} else if (initStyle == SCE_RUST_STRINGR) {
ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), false);
ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), StringType::RAW_STRING);
} else if (initStyle == SCE_RUST_BYTESTRINGR) {
ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), true);
ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), StringType::RAW_BYTESTRING);
} else if (initStyle == SCE_RUST_CSTRING) {
ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), StringType::CSTRING);
} else if (initStyle == SCE_RUST_CSTRINGR) {
ResumeRawString(styler, pos, max, styler.GetLineState(styler.GetLine(pos) - 1), StringType::RAW_CSTRING);
}

while (pos < max) {
int c = styler.SafeGetCharAt(pos, '\0');
int n = styler.SafeGetCharAt(pos + 1, '\0');
Expand All @@ -726,13 +744,19 @@ void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int
pos += 2;
ScanRawIdentifier(styler, pos);
} else if (c == 'r' && (n == '#' || n == '"')) {
ScanRawString(styler, pos, max, false);
ScanRawString(styler, pos, max, StringType::RAW_STRING);
} else if (c == 'b' && n == 'r' && (n2 == '#' || n2 == '"')) {
pos++;
ScanRawString(styler, pos, max, true);
ScanRawString(styler, pos, max, StringType::RAW_BYTESTRING);
} else if (c == 'b' && n == '"') {
pos += 2;
ResumeString(styler, pos, max, true);
ResumeString(styler, pos, max, StringType::BYTESTRING);
} else if (c == 'c' && n == 'r' && (n2 == '#' || n2 == '"')) {
pos++;
ScanRawString(styler, pos, max, StringType::RAW_CSTRING);
} else if (c == 'c' && n == '"') {
pos += 2;
ResumeString(styler, pos, max, StringType::CSTRING);
} else if (c == 'b' && n == '\'') {
pos++;
ScanCharacterLiteralOrLifetime(styler, pos, true);
Expand All @@ -753,7 +777,7 @@ void SCI_METHOD LexerRust::Lex(Sci_PositionU startPos, Sci_Position length, int
ScanCharacterLiteralOrLifetime(styler, pos, false);
} else if (c == '"') {
pos++;
ResumeString(styler, pos, max, false);
ResumeString(styler, pos, max, StringType::STRING);
} else {
pos++;
styler.ColourTo(pos - 1, SCE_RUST_LEXERROR);
Expand Down Expand Up @@ -860,4 +884,6 @@ void SCI_METHOD LexerRust::Fold(Sci_PositionU startPos, Sci_Position length, int
}
}

}

extern const LexerModule lmRust(SCLEX_RUST, LexerRust::LexerFactoryRust, "rust", rustWordLists);
36 changes: 36 additions & 0 deletions test/examples/rust/Issue268.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
// coding: utf-8

b"foo"; br"foo" // foo
b"\"foo\""; br#""foo""#; // "foo"

b"foo #\"# bar";
br##"foo #"# bar"##; // foo #"# bar

b"\x52"; b"R"; br"R" // R
b"\\x52"; br"\x52" // \x52

c"æ" // LATIN SMALL LETTER AE (U+00E6)
c"\u{00E6}";
c"\xC3\xA6";

c"foo"; cr"foo" // foo
c"\"foo\""; cr#""foo""#; // "foo"

c"foo #\"# bar";
cr##"foo #"# bar"##; // foo #"# bar

c"\x52"; c"R"; cr"R" // R
c"\\x52"; cr"\x52" // \x52

"foo"; r"foo" // foo
"\"foo\""; r#""foo""#; // "foo"

"foo #\"# bar";
r##"foo #"# bar"##; // foo #"# bar

"\x52"; "R"; r"R" // R
"\\x52"; r"\x52" // \x52

"æ" // LATIN SMALL LETTER AE (U+00E6)
"\u{00E6}";
"\xC3\xA6";
36 changes: 36 additions & 0 deletions test/examples/rust/Issue268.rs.folded
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
0 400 400 // coding: utf-8
1 400 400
0 400 400 b"foo"; br"foo" // foo
0 400 400 b"\"foo\""; br#""foo""#; // "foo"
1 400 400
0 400 400 b"foo #\"# bar";
0 400 400 br##"foo #"# bar"##; // foo #"# bar
1 400 400
0 400 400 b"\x52"; b"R"; br"R" // R
0 400 400 b"\\x52"; br"\x52" // \x52
1 400 400
0 400 400 c"æ" // LATIN SMALL LETTER AE (U+00E6)
0 400 400 c"\u{00E6}";
0 400 400 c"\xC3\xA6";
1 400 400
0 400 400 c"foo"; cr"foo" // foo
0 400 400 c"\"foo\""; cr#""foo""#; // "foo"
1 400 400
0 400 400 c"foo #\"# bar";
0 400 400 cr##"foo #"# bar"##; // foo #"# bar
1 400 400
0 400 400 c"\x52"; c"R"; cr"R" // R
0 400 400 c"\\x52"; cr"\x52" // \x52
1 400 400
0 400 400 "foo"; r"foo" // foo
0 400 400 "\"foo\""; r#""foo""#; // "foo"
1 400 400
0 400 400 "foo #\"# bar";
0 400 400 r##"foo #"# bar"##; // foo #"# bar
1 400 400
0 400 400 "\x52"; "R"; r"R" // R
0 400 400 "\\x52"; r"\x52" // \x52
1 400 400
0 400 400 "æ" // LATIN SMALL LETTER AE (U+00E6)
0 400 400 "\u{00E6}";
0 400 400 "\xC3\xA6";
36 changes: 36 additions & 0 deletions test/examples/rust/Issue268.rs.styled
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{2}// coding: utf-8{0}

{21}b"foo"{16};{0} {22}br"foo"{0} {2}// foo{0}
{21}b"\"foo\""{16};{0} {22}br#""foo""#{16};{0} {2}// "foo"{0}

{21}b"foo #\"# bar"{16};{0}
{22}br##"foo #"# bar"##{16};{0} {2}// foo #"# bar{0}

{21}b"\x52"{16};{0} {21}b"R"{16};{0} {22}br"R"{0} {2}// R{0}
{21}b"\\x52"{16};{0} {22}br"\x52"{0} {2}// \x52{0}

{24}c"æ"{0} {2}// LATIN SMALL LETTER AE (U+00E6){0}
{24}c"\u{00E6}"{16};{0}
{24}c"\xC3\xA6"{16};{0}

{24}c"foo"{16};{0} {25}cr"foo"{0} {2}// foo{0}
{24}c"\"foo\""{16};{0} {25}cr#""foo""#{16};{0} {2}// "foo"{0}

{24}c"foo #\"# bar"{16};{0}
{25}cr##"foo #"# bar"##{16};{0} {2}// foo #"# bar{0}

{24}c"\x52"{16};{0} {24}c"R"{16};{0} {25}cr"R"{0} {2}// R{0}
{24}c"\\x52"{16};{0} {25}cr"\x52"{0} {2}// \x52{0}

{13}"foo"{16};{0} {14}r"foo"{0} {2}// foo{0}
{13}"\"foo\""{16};{0} {14}r#""foo""#{16};{0} {2}// "foo"{0}

{13}"foo #\"# bar"{16};{0}
{14}r##"foo #"# bar"##{16};{0} {2}// foo #"# bar{0}

{13}"\x52"{16};{0} {13}"R"{16};{0} {14}r"R"{0} {2}// R{0}
{13}"\\x52"{16};{0} {14}r"\x52"{0} {2}// \x52{0}

{13}"æ"{0} {2}// LATIN SMALL LETTER AE (U+00E6){0}
{13}"\u{00E6}"{16};{0}
{13}"\xC3\xA6"{16};

0 comments on commit 8767cc9

Please sign in to comment.