Refactor comment directive handling

Merge pragma protect and translate_off handling, prepare for other kinds of directives.
MikePopoloski · Dec 31, 2024 · f4a4af6 · f4a4af6
1 parent 476bf53
commit f4a4af6
Show file tree

Hide file tree

Showing 6 changed files with 135 additions and 145 deletions.
diff --git a/include/slang/parsing/Lexer.h b/include/slang/parsing/Lexer.h
@@ -11,6 +11,7 @@
 #include "slang/parsing/LexerFacts.h"
 #include "slang/parsing/Token.h"
 #include "slang/text/SourceLocation.h"
+#include "slang/util/Hash.h"
 #include "slang/util/LanguageVersion.h"
 #include "slang/util/SmallVector.h"
 #include "slang/util/Util.h"
@@ -23,8 +24,48 @@ class BumpAllocator;
 
 namespace slang::parsing {
 
+/// A handler for a specific kind of directive embedded in comments in the
+/// user source text.
+struct CommentHandler {
+    /// The kind of directive this handler is for.
+    enum Kind {
+        /// A region that should be skipped (as if it were a pragma protect region).
+        Protect,
+
+        /// A region that should be skipped (as if it were commented out).
+        TranslateOff,
+
+        /// Turns linting on for one or more warnings.
+        LintOn,
+
+        /// Turns linting off for one or more warnings.
+        LintOff,
+
+        /// Saves the current lint state in a stack.
+        LintSave,
+
+        /// Restore a previously set lint state.
+        LintRestore
+    };
+
+    /// The kind of comment handler this is.
+    Kind kind;
+
+    /// For region handler, the text that marks the end of the region.
+    std::string_view endRegion;
+
+    CommentHandler() = default;
+    CommentHandler(Kind kind, std::string_view endRegion = {}) : kind(kind), endRegion(endRegion) {}
+};
+
+using CommentHandlerMap =
+    flat_hash_map<std::string_view, flat_hash_map<std::string_view, CommentHandler>>;
+
 /// Contains various options that can control lexing behavior.
 struct SLANG_EXPORT LexerOptions {
+    /// A map of comment handlers to use when lexing directives inside comments.
+    CommentHandlerMap commentHandlers;
+
     /// The maximum number of errors that can occur before the rest of the source
     /// buffer is skipped.
     uint32_t maxErrors = 16;
@@ -35,10 +76,6 @@ struct SLANG_EXPORT LexerOptions {
     /// If true, the preprocessor will support legacy protected envelope directives,
     /// for compatibility with old Verilog tools.
     bool enableLegacyProtect = false;
-
-    /// A flag to enable the interpretation of non-standard line comment pragmas
-    /// disabling parts of the input for synthesis.
-    bool enableTranslateOnOffCompat = false;
 };
 
 /// Possible encodings for encrypted text used in a pragma protect region.
@@ -117,8 +154,9 @@ class SLANG_EXPORT Lexer {
     bool scanUTF8Char(bool alreadyErrored, uint32_t* code, int& computedLen);
     void scanEncodedText(ProtectEncoding encoding, uint32_t expectedBytes, bool singleLine,
                          bool legacyProtectedMode);
-    void scanProtectComment();
-    void scanTranslateOffSection();
+    bool tryApplyCommentHandler();
+    void scanDisabledRegion(std::string_view firstWord, std::string_view secondWord,
+                            std::optional<std::string_view> thirdWord, DiagCode unclosedDiag);
 
     template<typename... Args>
     Token create(TokenKind kind, Args&&... args);

diff --git a/include/slang/text/CharInfo.h b/include/slang/text/CharInfo.h
@@ -57,6 +57,11 @@ constexpr bool isWhitespace(char c) {
     return false;
 }
 
+/// Returns whether the given character is considered a space or tab.
+constexpr bool isTabOrSpace(char c) {
+    return c == ' ' || c == '\t';
+}
+
 /// Returns whether the given character is considered a new line.
 constexpr bool isNewline(char c) {
     return c == '\r' || c == '\n';

diff --git a/source/driver/Driver.cpp b/source/driver/Driver.cpp
@@ -735,6 +735,9 @@ void Driver::addParseOptions(Bag& bag) const {
     if (options.maxLexerErrors.has_value())
         loptions.maxErrors = *options.maxLexerErrors;
 
+    if (loptions.enableLegacyProtect)
+        loptions.commentHandlers["pragma"]["protect"] = {CommentHandler::Protect};
+
     ParserOptions poptions;
     poptions.languageVersion = languageVersion;
     if (options.maxParseDepth.has_value())

diff --git a/source/parsing/Lexer.cpp b/source/parsing/Lexer.cpp
@@ -24,19 +24,6 @@ static_assert(std::numeric_limits<double>::is_iec559, "SystemVerilog requires IE
 
 static const double BitsPerDecimal = log2(10.0);
 
-static constexpr std::string_view PragmaBeginProtected = "pragma protect begin_protected"sv;
-static constexpr std::string_view PragmaEndProtected = "pragma protect end_protected"sv;
-
-// Note the detection algorithm requires these in alphabetical order; also when a prefix is
-// followed by a whitespace in one variant, it's assumed the same prefix will be followed by
-// a whitespace in all variants
-static std::vector<std::string_view> TranslateOffPragmas = {
-    "pragma synthesis_off"sv,   "pragma translate_off"sv,    "synopsys synthesis_off"sv,
-    "synopsys translate_off"sv, "synthesis translate_off"sv, "xilinx translate_off"sv};
-static std::vector<std::string_view> TranslateOnPragmas = {
-    "pragma synthesis_on"sv,   "pragma translate_on"sv,    "synopsys synthesis_on"sv,
-    "synopsys translate_on"sv, "synthesis translate_on"sv, "xilinx translate_on"sv};
-
 namespace slang::parsing {
 
 using namespace syntax;
@@ -1207,108 +1194,10 @@ void Lexer::scanWhitespace() {
     addTrivia(TriviaKind::Whitespace);
 }
 
-bool detectTranslateOnOffPragma(std::string_view view, bool offMode) {
-    if (view.length() < 2)
-        return false;
-    const char *p = view.data() + 2, *end = view.data() + view.size();
-
-    auto skipWs = [&] {
-        bool seen = false;
-        while (p != end && isWhitespace(*p)) {
-            seen = true;
-            p++;
-        }
-        return seen;
-    };
-
-    size_t cpos = 0;
-    auto clower = offMode ? TranslateOffPragmas.begin() : TranslateOnPragmas.begin();
-    auto cupper = offMode ? TranslateOffPragmas.end() : TranslateOnPragmas.end();
-
-    skipWs();
-    while (p != end) {
-        if ((*clower)[cpos] == ' ') {
-            if (!skipWs())
-                return false;
-
-            cpos++;
-        }
-        else {
-            while (clower < cupper && (*clower)[cpos] < *p)
-                clower++;
-            while (cupper > clower && (*(cupper - 1))[cpos] > *p)
-                cupper--;
-
-            if (clower == cupper)
-                return false;
-
-            cpos++;
-            p++;
-        }
-
-        if (cpos == clower->length()) {
-            // We have a complete match, check the comment line
-            // ends there or the match is followed by a whitespace
-            if (p == end || isWhitespace(*p))
-                return true;
-            return false;
-        }
-    }
-
-    return false;
-}
-
-void Lexer::scanTranslateOffSection() {
-    while (true) {
-        const char* commentStart = sourceBuffer;
-
-        switch (peek()) {
-            case '\0':
-                if (reallyAtEnd()) {
-                    addDiag(diag::UnclosedTranslateOff, currentOffset() - lexemeLength());
-                    return;
-                }
-                break;
-            case '/':
-                advance();
-                if (peek() == '/') {
-                    advance();
-                    while (!isNewline(peek()) && !reallyAtEnd())
-                        advance();
-
-                    std::string_view commentText =
-                        std::string_view(commentStart, (size_t)(sourceBuffer - commentStart));
-                    if (detectTranslateOnOffPragma(commentText, false))
-                        return;
-                }
-                continue;
-            default:
-                break;
-        }
-        advance();
-    }
-}
-
 void Lexer::scanLineComment() {
-    if (options.enableLegacyProtect) {
-        // See if we're looking at a pragma protect comment and skip
-        // over it if so.
-        while (peek() == ' ')
-            advance();
-
-        bool found = true;
-        for (char c : PragmaBeginProtected) {
-            if (!consume(c)) {
-                found = false;
-                break;
-            }
-        }
-
-        if (found) {
-            scanProtectComment();
-            addTrivia(TriviaKind::DisabledText);
-            return;
-        }
+    if (tryApplyCommentHandler()) {
+        addTrivia(TriviaKind::DisabledText);
+        return;
     }
 
     bool sawUTF8Error = false;
@@ -1334,14 +1223,6 @@ void Lexer::scanLineComment() {
         }
     }
 
-    if (options.enableTranslateOnOffCompat) {
-        if (detectTranslateOnOffPragma(lexeme(), true)) {
-            scanTranslateOffSection();
-            addTrivia(TriviaKind::DisabledText);
-            return;
-        }
-    }
-
     addTrivia(TriviaKind::LineComment);
 }
 
@@ -1383,6 +1264,52 @@ void Lexer::scanBlockComment() {
     addTrivia(TriviaKind::BlockComment);
 }
 
+bool Lexer::tryApplyCommentHandler() {
+    auto nextWord = [&]() {
+        // Skip over leading spaces and tabs.
+        while (isTabOrSpace(peek()))
+            advance();
+
+        auto start = sourceBuffer;
+        while (true) {
+            char c = peek();
+            if (!isAlphaNumeric(c) && c != '_')
+                break;
+
+            advance();
+        }
+
+        return std::string_view(start, sourceBuffer - start);
+    };
+
+    auto firstWord = nextWord();
+    auto it = options.commentHandlers.find(firstWord);
+    if (it == options.commentHandlers.end())
+        return false;
+
+    auto it2 = it->second.find(nextWord());
+    if (it2 == it->second.end())
+        return false;
+
+    auto& handler = it2->second;
+    switch (handler.kind) {
+        case CommentHandler::Protect:
+            // We need to see begin_protected, otherwise we ignore.
+            if (nextWord() == "begin_protected"sv) {
+                addDiag(diag::ProtectedEnvelope, currentOffset() - lexemeLength());
+                scanDisabledRegion(firstWord, "protect", "end_protected", diag::RawProtectEOF);
+                return true;
+            }
+            return false;
+        case CommentHandler::TranslateOff:
+            scanDisabledRegion(firstWord, handler.endRegion, std::nullopt,
+                               diag::UnclosedTranslateOff);
+            return true;
+        default:
+            SLANG_UNREACHABLE;
+    }
+}
+
 bool Lexer::scanUTF8Char(bool alreadyErrored) {
     uint32_t unused1;
     int unused2;
@@ -1590,32 +1517,36 @@ void Lexer::scanEncodedText(ProtectEncoding encoding, uint32_t expectedBytes, bo
     }
 }
 
-void Lexer::scanProtectComment() {
-    addDiag(diag::ProtectedEnvelope, currentOffset() - PragmaBeginProtected.size());
+void Lexer::scanDisabledRegion(std::string_view firstWord, std::string_view secondWord,
+                               std::optional<std::string_view> thirdWord, DiagCode unclosedDiag) {
+    auto matchWord = [&](std::string_view word) {
+        while (isTabOrSpace(peek()))
+            advance();
+
+        for (char c : word) {
+            if (!consume(c))
+                return false;
+        }
+
+        char c = peek();
+        return isWhitespace(c) || c == '\0';
+    };
 
     while (true) {
         char c = peek();
         if (c == '\0' && reallyAtEnd()) {
-            addDiag(diag::RawProtectEOF, currentOffset() - 1);
+            addDiag(unclosedDiag, currentOffset() - lexemeLength());
             return;
         }
 
         advance();
         if (c == '/' && peek() == '/') {
             advance();
-            while (peek() == ' ')
-                advance();
 
-            bool found = true;
-            for (char d : PragmaEndProtected) {
-                if (!consume(d)) {
-                    found = false;
-                    break;
-                }
+            if (matchWord(firstWord) && matchWord(secondWord)) {
+                if (!thirdWord || matchWord(*thirdWord))
+                    return;
             }
-
-            if (found)
-                return;
         }
     }
 }