Skip to content

Commit

Permalink
Refactor comment directive handling
Browse files Browse the repository at this point in the history
Merge pragma protect and translate_off handling, prepare for other kinds of directives.
  • Loading branch information
MikePopoloski committed Dec 31, 2024
1 parent 476bf53 commit f4a4af6
Show file tree
Hide file tree
Showing 6 changed files with 135 additions and 145 deletions.
50 changes: 44 additions & 6 deletions include/slang/parsing/Lexer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "slang/parsing/LexerFacts.h"
#include "slang/parsing/Token.h"
#include "slang/text/SourceLocation.h"
#include "slang/util/Hash.h"
#include "slang/util/LanguageVersion.h"
#include "slang/util/SmallVector.h"
#include "slang/util/Util.h"
Expand All @@ -23,8 +24,48 @@ class BumpAllocator;

namespace slang::parsing {

/// A handler for a specific kind of directive embedded in comments in the
/// user source text.
struct CommentHandler {
/// The kind of directive this handler is for.
enum Kind {
/// A region that should be skipped (as if it were a pragma protect region).
Protect,

/// A region that should be skipped (as if it were commented out).
TranslateOff,

/// Turns linting on for one or more warnings.
LintOn,

/// Turns linting off for one or more warnings.
LintOff,

/// Saves the current lint state in a stack.
LintSave,

/// Restore a previously set lint state.
LintRestore
};

/// The kind of comment handler this is.
Kind kind;

/// For region handler, the text that marks the end of the region.
std::string_view endRegion;

CommentHandler() = default;
CommentHandler(Kind kind, std::string_view endRegion = {}) : kind(kind), endRegion(endRegion) {}
};

using CommentHandlerMap =
flat_hash_map<std::string_view, flat_hash_map<std::string_view, CommentHandler>>;

/// Contains various options that can control lexing behavior.
struct SLANG_EXPORT LexerOptions {
/// A map of comment handlers to use when lexing directives inside comments.
CommentHandlerMap commentHandlers;

/// The maximum number of errors that can occur before the rest of the source
/// buffer is skipped.
uint32_t maxErrors = 16;
Expand All @@ -35,10 +76,6 @@ struct SLANG_EXPORT LexerOptions {
/// If true, the preprocessor will support legacy protected envelope directives,
/// for compatibility with old Verilog tools.
bool enableLegacyProtect = false;

/// A flag to enable the interpretation of non-standard line comment pragmas
/// disabling parts of the input for synthesis.
bool enableTranslateOnOffCompat = false;
};

/// Possible encodings for encrypted text used in a pragma protect region.
Expand Down Expand Up @@ -117,8 +154,9 @@ class SLANG_EXPORT Lexer {
bool scanUTF8Char(bool alreadyErrored, uint32_t* code, int& computedLen);
void scanEncodedText(ProtectEncoding encoding, uint32_t expectedBytes, bool singleLine,
bool legacyProtectedMode);
void scanProtectComment();
void scanTranslateOffSection();
bool tryApplyCommentHandler();
void scanDisabledRegion(std::string_view firstWord, std::string_view secondWord,
std::optional<std::string_view> thirdWord, DiagCode unclosedDiag);

template<typename... Args>
Token create(TokenKind kind, Args&&... args);
Expand Down
5 changes: 5 additions & 0 deletions include/slang/text/CharInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ constexpr bool isWhitespace(char c) {
return false;
}

/// Returns whether the given character is considered a space or tab.
constexpr bool isTabOrSpace(char c) {
return c == ' ' || c == '\t';
}

/// Returns whether the given character is considered a new line.
constexpr bool isNewline(char c) {
return c == '\r' || c == '\n';
Expand Down
3 changes: 3 additions & 0 deletions source/driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -735,6 +735,9 @@ void Driver::addParseOptions(Bag& bag) const {
if (options.maxLexerErrors.has_value())
loptions.maxErrors = *options.maxLexerErrors;

if (loptions.enableLegacyProtect)
loptions.commentHandlers["pragma"]["protect"] = {CommentHandler::Protect};

ParserOptions poptions;
poptions.languageVersion = languageVersion;
if (options.maxParseDepth.has_value())
Expand Down
203 changes: 67 additions & 136 deletions source/parsing/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,6 @@ static_assert(std::numeric_limits<double>::is_iec559, "SystemVerilog requires IE

static const double BitsPerDecimal = log2(10.0);

static constexpr std::string_view PragmaBeginProtected = "pragma protect begin_protected"sv;
static constexpr std::string_view PragmaEndProtected = "pragma protect end_protected"sv;

// Note the detection algorithm requires these in alphabetical order; also when a prefix is
// followed by a whitespace in one variant, it's assumed the same prefix will be followed by
// a whitespace in all variants
static std::vector<std::string_view> TranslateOffPragmas = {
"pragma synthesis_off"sv, "pragma translate_off"sv, "synopsys synthesis_off"sv,
"synopsys translate_off"sv, "synthesis translate_off"sv, "xilinx translate_off"sv};
static std::vector<std::string_view> TranslateOnPragmas = {
"pragma synthesis_on"sv, "pragma translate_on"sv, "synopsys synthesis_on"sv,
"synopsys translate_on"sv, "synthesis translate_on"sv, "xilinx translate_on"sv};

namespace slang::parsing {

using namespace syntax;
Expand Down Expand Up @@ -1207,108 +1194,10 @@ void Lexer::scanWhitespace() {
addTrivia(TriviaKind::Whitespace);
}

bool detectTranslateOnOffPragma(std::string_view view, bool offMode) {
if (view.length() < 2)
return false;
const char *p = view.data() + 2, *end = view.data() + view.size();

auto skipWs = [&] {
bool seen = false;
while (p != end && isWhitespace(*p)) {
seen = true;
p++;
}
return seen;
};

size_t cpos = 0;
auto clower = offMode ? TranslateOffPragmas.begin() : TranslateOnPragmas.begin();
auto cupper = offMode ? TranslateOffPragmas.end() : TranslateOnPragmas.end();

skipWs();
while (p != end) {
if ((*clower)[cpos] == ' ') {
if (!skipWs())
return false;

cpos++;
}
else {
while (clower < cupper && (*clower)[cpos] < *p)
clower++;
while (cupper > clower && (*(cupper - 1))[cpos] > *p)
cupper--;

if (clower == cupper)
return false;

cpos++;
p++;
}

if (cpos == clower->length()) {
// We have a complete match, check the comment line
// ends there or the match is followed by a whitespace
if (p == end || isWhitespace(*p))
return true;
return false;
}
}

return false;
}

void Lexer::scanTranslateOffSection() {
while (true) {
const char* commentStart = sourceBuffer;

switch (peek()) {
case '\0':
if (reallyAtEnd()) {
addDiag(diag::UnclosedTranslateOff, currentOffset() - lexemeLength());
return;
}
break;
case '/':
advance();
if (peek() == '/') {
advance();
while (!isNewline(peek()) && !reallyAtEnd())
advance();

std::string_view commentText =
std::string_view(commentStart, (size_t)(sourceBuffer - commentStart));
if (detectTranslateOnOffPragma(commentText, false))
return;
}
continue;
default:
break;
}
advance();
}
}

void Lexer::scanLineComment() {
if (options.enableLegacyProtect) {
// See if we're looking at a pragma protect comment and skip
// over it if so.
while (peek() == ' ')
advance();

bool found = true;
for (char c : PragmaBeginProtected) {
if (!consume(c)) {
found = false;
break;
}
}

if (found) {
scanProtectComment();
addTrivia(TriviaKind::DisabledText);
return;
}
if (tryApplyCommentHandler()) {
addTrivia(TriviaKind::DisabledText);
return;
}

bool sawUTF8Error = false;
Expand All @@ -1334,14 +1223,6 @@ void Lexer::scanLineComment() {
}
}

if (options.enableTranslateOnOffCompat) {
if (detectTranslateOnOffPragma(lexeme(), true)) {
scanTranslateOffSection();
addTrivia(TriviaKind::DisabledText);
return;
}
}

addTrivia(TriviaKind::LineComment);
}

Expand Down Expand Up @@ -1383,6 +1264,52 @@ void Lexer::scanBlockComment() {
addTrivia(TriviaKind::BlockComment);
}

bool Lexer::tryApplyCommentHandler() {
auto nextWord = [&]() {
// Skip over leading spaces and tabs.
while (isTabOrSpace(peek()))
advance();

auto start = sourceBuffer;
while (true) {
char c = peek();
if (!isAlphaNumeric(c) && c != '_')
break;

advance();
}

return std::string_view(start, sourceBuffer - start);
};

auto firstWord = nextWord();
auto it = options.commentHandlers.find(firstWord);
if (it == options.commentHandlers.end())
return false;

auto it2 = it->second.find(nextWord());
if (it2 == it->second.end())
return false;

auto& handler = it2->second;
switch (handler.kind) {
case CommentHandler::Protect:
// We need to see begin_protected, otherwise we ignore.
if (nextWord() == "begin_protected"sv) {
addDiag(diag::ProtectedEnvelope, currentOffset() - lexemeLength());
scanDisabledRegion(firstWord, "protect", "end_protected", diag::RawProtectEOF);
return true;
}
return false;
case CommentHandler::TranslateOff:
scanDisabledRegion(firstWord, handler.endRegion, std::nullopt,
diag::UnclosedTranslateOff);
return true;
default:
SLANG_UNREACHABLE;
}
}

bool Lexer::scanUTF8Char(bool alreadyErrored) {
uint32_t unused1;
int unused2;
Expand Down Expand Up @@ -1590,32 +1517,36 @@ void Lexer::scanEncodedText(ProtectEncoding encoding, uint32_t expectedBytes, bo
}
}

void Lexer::scanProtectComment() {
addDiag(diag::ProtectedEnvelope, currentOffset() - PragmaBeginProtected.size());
void Lexer::scanDisabledRegion(std::string_view firstWord, std::string_view secondWord,
std::optional<std::string_view> thirdWord, DiagCode unclosedDiag) {
auto matchWord = [&](std::string_view word) {
while (isTabOrSpace(peek()))
advance();

for (char c : word) {
if (!consume(c))
return false;
}

char c = peek();
return isWhitespace(c) || c == '\0';
};

while (true) {
char c = peek();
if (c == '\0' && reallyAtEnd()) {
addDiag(diag::RawProtectEOF, currentOffset() - 1);
addDiag(unclosedDiag, currentOffset() - lexemeLength());
return;
}

advance();
if (c == '/' && peek() == '/') {
advance();
while (peek() == ' ')
advance();

bool found = true;
for (char d : PragmaEndProtected) {
if (!consume(d)) {
found = false;
break;
}
if (matchWord(firstWord) && matchWord(secondWord)) {
if (!thirdWord || matchWord(*thirdWord))
return;
}

if (found)
return;
}
}
}
Expand Down
Loading

0 comments on commit f4a4af6

Please sign in to comment.