forked from llvm/llvm-project
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libc++] Rewrite the transitive header checking machinery (llvm#110554)
Since we don't generate a full dependency graph of headers, we can greatly simplify the script that parses the result of --trace-includes. At the same time, we also unify the mechanism for detecting whether a header is a public/C compat/internal/etc header with the existing mechanism in header_information.py. As a drive-by this fixes the headers_in_modulemap.sh.py test which had been disabled by mistake because it used its own way of determining the list of libc++ headers. By consistently using header_information.py to get that information, problems like this shouldn't happen anymore. This should also unblock llvm#110303, which was blocked because of a brittle implementation of the transitive includes check which broke when the repository was cloned at a path like /path/__something/more.
- Loading branch information
Showing
8 changed files
with
297 additions
and
298 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
#!/usr/bin/env python | ||
# ===----------------------------------------------------------------------===## | ||
# | ||
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# ===----------------------------------------------------------------------===## | ||
|
||
from typing import List, Tuple, Optional | ||
import argparse | ||
import io | ||
import itertools | ||
import os | ||
import pathlib | ||
import re | ||
import sys | ||
|
||
libcxx_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))) | ||
sys.path.append(os.path.join(libcxx_root, "utils")) | ||
from libcxx.header_information import Header | ||
|
||
def parse_line(line: str) -> Tuple[int, str]: | ||
""" | ||
Parse a single line of --trace-includes output. | ||
Returns the inclusion level and the raw file name being included. | ||
""" | ||
match = re.match(r"(\.+) (.+)", line) | ||
if not match: | ||
raise ArgumentError(f"Line {line} contains invalid data.") | ||
|
||
# The number of periods in front of the header name is the nesting level of | ||
# that header. | ||
return (len(match.group(1)), match.group(2)) | ||
|
||
def make_cxx_v1_relative(header: str) -> Optional[str]: | ||
""" | ||
Returns the path of the header as relative to <whatever>/c++/v1, or None if the path | ||
doesn't contain c++/v1. | ||
We use that heuristic to figure out which headers are libc++ headers. | ||
""" | ||
# On Windows, the path separators can either be forward slash or backslash. | ||
# If it is a backslash, Clang prints it escaped as two consecutive | ||
# backslashes, and they need to be escaped in the RE. (Use a raw string for | ||
# the pattern to avoid needing another level of escaping on the Python string | ||
# literal level.) | ||
pathsep = r"(?:/|\\\\)" | ||
CXX_V1_REGEX = r"^.*c\+\+" + pathsep + r"v[0-9]+" + pathsep + r"(.+)$" | ||
match = re.match(CXX_V1_REGEX, header) | ||
if not match: | ||
return None | ||
else: | ||
return match.group(1) | ||
|
||
def parse_file(file: io.TextIOBase) -> List[Tuple[Header, Header]]: | ||
""" | ||
Parse a file containing --trace-includes output to generate a list of the | ||
transitive includes contained in it. | ||
""" | ||
result = [] | ||
includer = None | ||
for line in file.readlines(): | ||
(level, header) = parse_line(line) | ||
relative = make_cxx_v1_relative(header) | ||
|
||
# Not a libc++ header | ||
if relative is None: | ||
continue | ||
|
||
# If we're at the first level, remember this header as being the one who includes other headers. | ||
# There's usually exactly one, except if the compiler is passed a file with `-include`. | ||
if level == 1: | ||
includer = Header(relative) | ||
continue | ||
|
||
# Otherwise, take note that this header is being included by the top-level includer. | ||
else: | ||
assert includer is not None | ||
result.append((includer, Header(relative))) | ||
return result | ||
|
||
def print_csv(includes: List[Tuple[Header, Header]]) -> None: | ||
""" | ||
Print the transitive includes as space-delimited CSV. | ||
This function only prints public libc++ headers that are not C compatibility headers. | ||
""" | ||
# Sort and group by includer | ||
by_includer = lambda t: t[0] | ||
includes = itertools.groupby(sorted(includes, key=by_includer), key=by_includer) | ||
|
||
for (includer, includees) in includes: | ||
includees = map(lambda t: t[1], includees) | ||
for h in sorted(set(includees)): | ||
if h.is_public() and not h.is_C_compatibility(): | ||
print(f"{includer} {h}") | ||
|
||
def main(argv): | ||
parser = argparse.ArgumentParser( | ||
description=""" | ||
Given a list of headers produced by --trace-includes, produce a list of libc++ headers in that output. | ||
Note that -fshow-skipped-includes must also be passed to the compiler in order to get sufficient | ||
information for this script to run. | ||
The output of this script is provided in space-delimited CSV format where each line contains: | ||
<header performing inclusion> <header being included> | ||
""") | ||
parser.add_argument("inputs", type=argparse.FileType("r"), nargs='+', default=None, | ||
help="One or more files containing the result of --trace-includes") | ||
args = parser.parse_args(argv) | ||
|
||
includes = [line for file in args.inputs for line in parse_file(file)] | ||
print_csv(includes) | ||
|
||
if __name__ == "__main__": | ||
main(sys.argv[1:]) |
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.