Skip to content

Commit

Permalink
[libc++] Rewrite the transitive header checking machinery (llvm#110554)
Browse files Browse the repository at this point in the history
Since we don't generate a full dependency graph of headers, we can
greatly simplify the script that parses the result of --trace-includes.

At the same time, we also unify the mechanism for detecting whether a
header is a public/C compat/internal/etc header with the existing
mechanism in header_information.py.

As a drive-by this fixes the headers_in_modulemap.sh.py test which had
been disabled by mistake because it used its own way of determining
the list of libc++ headers. By consistently using header_information.py
to get that information, problems like this shouldn't happen anymore.

This should also unblock llvm#110303, which was blocked because of
a brittle implementation of the transitive includes check which broke
when the repository was cloned at a path like /path/__something/more.
  • Loading branch information
ldionne authored and EricWF committed Oct 22, 2024
1 parent b38a802 commit 7879fad
Show file tree
Hide file tree
Showing 8 changed files with 297 additions and 298 deletions.
2 changes: 1 addition & 1 deletion libcxx/test/libcxx/header_inclusions.gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from libcxx.header_information import lit_header_restrictions, public_headers, mandatory_inclusions

for header in public_headers:
header_guard = lambda h: f"_LIBCPP_{h.upper().replace('.', '_').replace('/', '_')}"
header_guard = lambda h: f"_LIBCPP_{str(h).upper().replace('.', '_').replace('/', '_')}"

# <cassert> has no header guards
if header == 'cassert':
Expand Down
22 changes: 6 additions & 16 deletions libcxx/test/libcxx/headers_in_modulemap.sh.py
Original file line number Diff line number Diff line change
@@ -1,25 +1,15 @@
# RUN: %{python} %s %{libcxx-dir}/utils %{include-dir}
# RUN: %{python} %s %{libcxx-dir}/utils

import sys

sys.path.append(sys.argv[1])
from libcxx.header_information import all_headers, libcxx_include

import pathlib
import sys
from libcxx.header_information import is_modulemap_header, is_header

headers = list(pathlib.Path(sys.argv[2]).rglob("*"))
modulemap = open(f"{sys.argv[2]}/module.modulemap").read()
with open(libcxx_include / "module.modulemap") as f:
modulemap = f.read()

isHeaderMissing = False

for header in headers:
if not is_header(header):
continue

header = header.relative_to(pathlib.Path(sys.argv[2])).as_posix()

if not is_modulemap_header(header):
for header in all_headers:
if not header.is_in_modulemap():
continue

if not str(header) in modulemap:
Expand Down
12 changes: 6 additions & 6 deletions libcxx/test/libcxx/transitive_includes.gen.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@

all_traces = []
for header in sorted(public_headers):
if header.endswith(".h"): # Skip C compatibility or detail headers
if header.is_C_compatibility() or header.is_internal():
continue

normalized_header = re.sub("/", "_", header)
normalized_header = re.sub("/", "_", str(header))
print(
f"""\
// RUN: echo "#include <{header}>" | %{{cxx}} -xc++ - %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.{normalized_header}.txt
Expand All @@ -55,17 +55,17 @@

print(
f"""\
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes_to_csv.py {' '.join(all_traces)} > %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py {' '.join(all_traces)} > %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv
"""
)

else:
for header in public_headers:
if header.endswith(".h"): # Skip C compatibility or detail headers
if header.is_C_compatibility() or header.is_internal():
continue

# Escape slashes for the awk command below
escaped_header = header.replace("/", "\\/")
escaped_header = str(header).replace("/", "\\/")

print(
f"""\
Expand All @@ -92,7 +92,7 @@
// RUN: mkdir %t
// RUN: %{{cxx}} %s %{{flags}} %{{compile_flags}} --trace-includes -fshow-skipped-includes --preprocess > /dev/null 2> %t/trace-includes.txt
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes_to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
// RUN: %{{python}} %{{libcxx-dir}}/test/libcxx/transitive_includes/to_csv.py %t/trace-includes.txt > %t/actual_transitive_includes.csv
// RUN: cat %{{libcxx-dir}}/test/libcxx/transitive_includes/%{{cxx_std}}.csv | awk '/^{escaped_header} / {{ print }}' > %t/expected_transitive_includes.csv
// RUN: diff -w %t/expected_transitive_includes.csv %t/actual_transitive_includes.csv
#include <{header}>
Expand Down
120 changes: 120 additions & 0 deletions libcxx/test/libcxx/transitive_includes/to_csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
#!/usr/bin/env python
# ===----------------------------------------------------------------------===##
#
# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# ===----------------------------------------------------------------------===##

from typing import List, Tuple, Optional
import argparse
import io
import itertools
import os
import pathlib
import re
import sys

libcxx_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
sys.path.append(os.path.join(libcxx_root, "utils"))
from libcxx.header_information import Header

def parse_line(line: str) -> Tuple[int, str]:
"""
Parse a single line of --trace-includes output.
Returns the inclusion level and the raw file name being included.
"""
match = re.match(r"(\.+) (.+)", line)
if not match:
raise ArgumentError(f"Line {line} contains invalid data.")

# The number of periods in front of the header name is the nesting level of
# that header.
return (len(match.group(1)), match.group(2))

def make_cxx_v1_relative(header: str) -> Optional[str]:
"""
Returns the path of the header as relative to <whatever>/c++/v1, or None if the path
doesn't contain c++/v1.
We use that heuristic to figure out which headers are libc++ headers.
"""
# On Windows, the path separators can either be forward slash or backslash.
# If it is a backslash, Clang prints it escaped as two consecutive
# backslashes, and they need to be escaped in the RE. (Use a raw string for
# the pattern to avoid needing another level of escaping on the Python string
# literal level.)
pathsep = r"(?:/|\\\\)"
CXX_V1_REGEX = r"^.*c\+\+" + pathsep + r"v[0-9]+" + pathsep + r"(.+)$"
match = re.match(CXX_V1_REGEX, header)
if not match:
return None
else:
return match.group(1)

def parse_file(file: io.TextIOBase) -> List[Tuple[Header, Header]]:
"""
Parse a file containing --trace-includes output to generate a list of the
transitive includes contained in it.
"""
result = []
includer = None
for line in file.readlines():
(level, header) = parse_line(line)
relative = make_cxx_v1_relative(header)

# Not a libc++ header
if relative is None:
continue

# If we're at the first level, remember this header as being the one who includes other headers.
# There's usually exactly one, except if the compiler is passed a file with `-include`.
if level == 1:
includer = Header(relative)
continue

# Otherwise, take note that this header is being included by the top-level includer.
else:
assert includer is not None
result.append((includer, Header(relative)))
return result

def print_csv(includes: List[Tuple[Header, Header]]) -> None:
"""
Print the transitive includes as space-delimited CSV.
This function only prints public libc++ headers that are not C compatibility headers.
"""
# Sort and group by includer
by_includer = lambda t: t[0]
includes = itertools.groupby(sorted(includes, key=by_includer), key=by_includer)

for (includer, includees) in includes:
includees = map(lambda t: t[1], includees)
for h in sorted(set(includees)):
if h.is_public() and not h.is_C_compatibility():
print(f"{includer} {h}")

def main(argv):
parser = argparse.ArgumentParser(
description="""
Given a list of headers produced by --trace-includes, produce a list of libc++ headers in that output.
Note that -fshow-skipped-includes must also be passed to the compiler in order to get sufficient
information for this script to run.
The output of this script is provided in space-delimited CSV format where each line contains:
<header performing inclusion> <header being included>
""")
parser.add_argument("inputs", type=argparse.FileType("r"), nargs='+', default=None,
help="One or more files containing the result of --trace-includes")
args = parser.parse_args(argv)

includes = [line for file in args.inputs for line in parse_file(file)]
print_csv(includes)

if __name__ == "__main__":
main(sys.argv[1:])
147 changes: 0 additions & 147 deletions libcxx/test/libcxx/transitive_includes_to_csv.py

This file was deleted.

2 changes: 1 addition & 1 deletion libcxx/utils/generate_iwyu_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def main(argv: typing.List[str]):

mappings = [] # Pairs of (header, public_header)
for header in libcxx.header_information.all_headers:
public_headers = IWYU_mapping(header)
public_headers = IWYU_mapping(str(header))
if public_headers is not None:
mappings.extend((header, public) for public in public_headers)

Expand Down
14 changes: 3 additions & 11 deletions libcxx/utils/generate_libcxx_cppm_in.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,11 @@
import os.path
import sys

from libcxx.header_information import module_c_headers
from libcxx.header_information import module_headers
from libcxx.header_information import header_restrictions
from libcxx.header_information import headers_not_available
from libcxx.header_information import module_c_headers, module_headers, header_restrictions, headers_not_available, libcxx_root


def write_file(module):
libcxx_module_directory = os.path.join(
os.path.dirname(os.path.dirname(os.path.realpath(__file__))), "modules"
)
with open(
os.path.join(libcxx_module_directory, f"{module}.cppm.in"), "w"
) as module_cpp_in:
with open(libcxx_root / "modules" / f"{module}.cppm.in", "w") as module_cpp_in:
module_cpp_in.write(
"""\
// -*- C++ -*-
Expand All @@ -45,7 +37,7 @@ def write_file(module):
// and the headers of Table 25: C++ headers for C library facilities [tab:headers.cpp.c]
"""
)
for header in module_headers if module == "std" else module_c_headers:
for header in sorted(module_headers if module == "std" else module_c_headers):
if header in header_restrictions:
module_cpp_in.write(
f"""\
Expand Down
Loading

0 comments on commit 7879fad

Please sign in to comment.